+++ /dev/null
-package org.apache.lucene.store.instantiated;
-
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-import org.apache.lucene.index.Term;
-
-/**
- * A term in the inverted index, coupled to the documents it occurs in.
- *
- * @see org.apache.lucene.index.Term
- */
-public class InstantiatedTerm
- implements Serializable {
-
- private static final long serialVersionUID = 1l;
-
- public static final Comparator<InstantiatedTerm> comparator = new Comparator<InstantiatedTerm>() {
- public int compare(InstantiatedTerm instantiatedTerm, InstantiatedTerm instantiatedTerm1) {
- return instantiatedTerm.getTerm().compareTo(instantiatedTerm1.getTerm());
- }
- };
-
- public static final Comparator<Object> termComparator = new Comparator<Object>() {
- public int compare(Object o, Object o1) {
- return ((InstantiatedTerm)o).getTerm().compareTo((Term)o1);
- }
- };
-
- private Term term;
-
- /**
- * index of term in InstantiatedIndex
- * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
- private int termIndex;
-
- /**
- * @return Term associated with this entry of the index object graph
- */
- public Term getTerm() {
- return term;
- }
-
- InstantiatedTerm(String field, String text) {
- this.term = new Term(field, text);
- }
-
-// this could speed up TermDocs.skipTo even more
-// private Map</** document number*/Integer, /** index in associatedDocuments */Integer> associatedDocumentIndexByDocumentNumber = new HashMap<Integer, Integer>();
-//
-// public Map</** document number*/Integer, /** index in associatedDocuments */Integer> getAssociatedDocumentIndexByDocumentNumber() {
-// return associatedDocumentIndexByDocumentNumber;
-// }
-
- /** Ordered by document number */
- private InstantiatedTermDocumentInformation[] associatedDocuments;
-
- /**
- * Meta data per document in which this term is occurring.
- * Ordered by document number.
- *
- * @return Meta data per document in which this term is occurring.
- */
- public InstantiatedTermDocumentInformation[] getAssociatedDocuments() {
- return associatedDocuments;
- }
-
-
- /**
- * Meta data per document in which this term is occurring.
- * Ordered by document number.
- *
- * @param associatedDocuments meta data per document in which this term is occurring, ordered by document number
- */
- void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) {
- this.associatedDocuments = associatedDocuments;
- }
-
- /**
- * Finds index to the first beyond the current whose document number is
- * greater than or equal to <i>target</i>, -1 if there is no such element.
- *
- * @param target the document number to match
- * @return -1 if there is no such element
- */
- public int seekCeilingDocumentInformationIndex(int target) {
- return seekCeilingDocumentInformationIndex(target, 0, getAssociatedDocuments().length);
- }
-
- /**
- * Finds index to the first beyond the current whose document number is
- * greater than or equal to <i>target</i>, -1 if there is no such element.
- *
- * @param target the document number to match
- * @param startOffset associated documents index start offset
- * @return -1 if there is no such element
- */
- public int seekCeilingDocumentInformationIndex(int target, int startOffset) {
- return seekCeilingDocumentInformationIndex(target, startOffset, getAssociatedDocuments().length);
- }
-
- /**
- * Finds index to the first beyond the current whose document number is
- * greater than or equal to <i>target</i>, -1 if there is no such element.
- *
- * @param target the document number to match
- * @param startOffset associated documents index start offset
- * @param endPosition associated documents index end position
- * @return -1 if there is no such element
- */
- public int seekCeilingDocumentInformationIndex(int target, int startOffset, int endPosition) {
-
- int pos = binarySearchAssociatedDocuments(target, startOffset, endPosition - startOffset);
-
-// int pos = Arrays.binarySearch(getAssociatedDocuments(), target, InstantiatedTermDocumentInformation.doumentNumberIntegerComparator);
-
- if (pos < 0) {
- pos = -1 - pos;
- }
- if (getAssociatedDocuments().length <= pos) {
- return -1;
- } else {
- return pos;
- }
- }
-
- public int binarySearchAssociatedDocuments(int target) {
- return binarySearchAssociatedDocuments(target, 0);
- }
-
- public int binarySearchAssociatedDocuments(int target, int offset) {
- return binarySearchAssociatedDocuments(target, offset, associatedDocuments.length - offset);
- }
-
- /**
- * @param target value to search for in the array
- * @param offset index of the first valid value in the array
- * @param length number of valid values in the array
- * @return index of an occurrence of key in array, or -(insertionIndex + 1) if key is not contained in array (<i>insertionIndex</i> is then the index at which key could be inserted).
- */
- public int binarySearchAssociatedDocuments(int target, int offset, int length) {
-
- // implementation originally from http://ochafik.free.fr/blog/?p=106
-
- if (length == 0) {
- return -1 - offset;
- }
- int min = offset, max = offset + length - 1;
- int minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
- int maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
-
-
- int nPreviousSteps = 0;
-
- for (; ;) {
-
- // be careful not to compute key - minVal, for there might be an integer overflow.
- if (target <= minVal) return target == minVal ? min : -1 - min;
- if (target >= maxVal) return target == maxVal ? max : -2 - max;
-
- assert min != max;
-
- int pivot;
- // A typical binarySearch algorithm uses pivot = (min + max) / 2.
- // The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key.
- // This reduces dramatically the number of steps needed to get to the key.
- // However, it does not work well with a logarithmic distribution of values, for instance.
- // When the key is not found quickly the smart way, we switch to the standard pivot.
- if (nPreviousSteps > 2) {
- pivot = (min + max) >> 1;
- // stop increasing nPreviousSteps from now on
- } else {
- // NOTE: We cannot do the following operations in int precision, because there might be overflows.
- // long operations are slower than float operations with the hardware this was tested on (intel core duo 2, JVM 1.6.0).
- // Overall, using float proved to be the safest and fastest approach.
- pivot = min + (int) ((target - (float) minVal) / (maxVal - (float) minVal) * (max - min));
- nPreviousSteps++;
- }
-
- int pivotVal = getAssociatedDocuments()[pivot].getDocument().getDocumentNumber();
-
- // NOTE: do not store key - pivotVal because of overflows
- if (target > pivotVal) {
- min = pivot + 1;
- max--;
- } else if (target == pivotVal) {
- return pivot;
- } else {
- min++;
- max = pivot - 1;
- }
- maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
- minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
- }
- }
-
-
- /**
- * Navigates to the view of this occurrences of this term in a specific document.
- *
- * This method is only used by InstantiatedIndex(IndexReader) and
- * should not be optimized for less CPU at the cost of more RAM.
- *
- * @param documentNumber the n:th document in the index
- * @return view of this term from specified document
- */
- public InstantiatedTermDocumentInformation getAssociatedDocument(int documentNumber) {
- int pos = binarySearchAssociatedDocuments(documentNumber);
- return pos < 0 ? null : getAssociatedDocuments()[pos];
- }
-
- public final String field() {
- return term.field();
- }
-
- public final String text() {
- return term.text();
- }
-
- @Override
- public String toString() {
- return term.toString();
- }
-
-
- public int getTermIndex() {
- return termIndex;
- }
-
- public void setTermIndex(int termIndex) {
- this.termIndex = termIndex;
- }
-}