X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java diff --git a/lucene-java-3.4.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java b/lucene-java-3.4.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java deleted file mode 100644 index b893e04..0000000 --- a/lucene-java-3.4.0/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java +++ /dev/null @@ -1,249 +0,0 @@ -package org.apache.lucene.store.instantiated; - -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.Serializable; -import java.util.Comparator; - -import org.apache.lucene.index.Term; - -/** - * A term in the inverted index, coupled to the documents it occurs in. - * - * @see org.apache.lucene.index.Term - */ -public class InstantiatedTerm - implements Serializable { - - private static final long serialVersionUID = 1l; - - public static final Comparator comparator = new Comparator() { - public int compare(InstantiatedTerm instantiatedTerm, InstantiatedTerm instantiatedTerm1) { - return instantiatedTerm.getTerm().compareTo(instantiatedTerm1.getTerm()); - } - }; - - public static final Comparator termComparator = new Comparator() { - public int compare(Object o, Object o1) { - return ((InstantiatedTerm)o).getTerm().compareTo((Term)o1); - } - }; - - private Term term; - - /** - * index of term in InstantiatedIndex - * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */ - private int termIndex; - - /** - * @return Term associated with this entry of the index object graph - */ - public Term getTerm() { - return term; - } - - InstantiatedTerm(String field, String text) { - this.term = new Term(field, text); - } - -// this could speed up TermDocs.skipTo even more -// private Map associatedDocumentIndexByDocumentNumber = new HashMap(); -// -// public Map getAssociatedDocumentIndexByDocumentNumber() { -// return associatedDocumentIndexByDocumentNumber; -// } - - /** Ordered by document number */ - private InstantiatedTermDocumentInformation[] associatedDocuments; - - /** - * Meta data per document in which this term is occurring. - * Ordered by document number. - * - * @return Meta data per document in which this term is occurring. - */ - public InstantiatedTermDocumentInformation[] getAssociatedDocuments() { - return associatedDocuments; - } - - - /** - * Meta data per document in which this term is occurring. - * Ordered by document number. - * - * @param associatedDocuments meta data per document in which this term is occurring, ordered by document number - */ - void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) { - this.associatedDocuments = associatedDocuments; - } - - /** - * Finds index to the first beyond the current whose document number is - * greater than or equal to target, -1 if there is no such element. - * - * @param target the document number to match - * @return -1 if there is no such element - */ - public int seekCeilingDocumentInformationIndex(int target) { - return seekCeilingDocumentInformationIndex(target, 0, getAssociatedDocuments().length); - } - - /** - * Finds index to the first beyond the current whose document number is - * greater than or equal to target, -1 if there is no such element. - * - * @param target the document number to match - * @param startOffset associated documents index start offset - * @return -1 if there is no such element - */ - public int seekCeilingDocumentInformationIndex(int target, int startOffset) { - return seekCeilingDocumentInformationIndex(target, startOffset, getAssociatedDocuments().length); - } - - /** - * Finds index to the first beyond the current whose document number is - * greater than or equal to target, -1 if there is no such element. - * - * @param target the document number to match - * @param startOffset associated documents index start offset - * @param endPosition associated documents index end position - * @return -1 if there is no such element - */ - public int seekCeilingDocumentInformationIndex(int target, int startOffset, int endPosition) { - - int pos = binarySearchAssociatedDocuments(target, startOffset, endPosition - startOffset); - -// int pos = Arrays.binarySearch(getAssociatedDocuments(), target, InstantiatedTermDocumentInformation.doumentNumberIntegerComparator); - - if (pos < 0) { - pos = -1 - pos; - } - if (getAssociatedDocuments().length <= pos) { - return -1; - } else { - return pos; - } - } - - public int binarySearchAssociatedDocuments(int target) { - return binarySearchAssociatedDocuments(target, 0); - } - - public int binarySearchAssociatedDocuments(int target, int offset) { - return binarySearchAssociatedDocuments(target, offset, associatedDocuments.length - offset); - } - - /** - * @param target value to search for in the array - * @param offset index of the first valid value in the array - * @param length number of valid values in the array - * @return index of an occurrence of key in array, or -(insertionIndex + 1) if key is not contained in array (insertionIndex is then the index at which key could be inserted). - */ - public int binarySearchAssociatedDocuments(int target, int offset, int length) { - - // implementation originally from http://ochafik.free.fr/blog/?p=106 - - if (length == 0) { - return -1 - offset; - } - int min = offset, max = offset + length - 1; - int minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber(); - int maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber(); - - - int nPreviousSteps = 0; - - for (; ;) { - - // be careful not to compute key - minVal, for there might be an integer overflow. - if (target <= minVal) return target == minVal ? min : -1 - min; - if (target >= maxVal) return target == maxVal ? max : -2 - max; - - assert min != max; - - int pivot; - // A typical binarySearch algorithm uses pivot = (min + max) / 2. - // The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key. - // This reduces dramatically the number of steps needed to get to the key. - // However, it does not work well with a logarithmic distribution of values, for instance. - // When the key is not found quickly the smart way, we switch to the standard pivot. - if (nPreviousSteps > 2) { - pivot = (min + max) >> 1; - // stop increasing nPreviousSteps from now on - } else { - // NOTE: We cannot do the following operations in int precision, because there might be overflows. - // long operations are slower than float operations with the hardware this was tested on (intel core duo 2, JVM 1.6.0). - // Overall, using float proved to be the safest and fastest approach. - pivot = min + (int) ((target - (float) minVal) / (maxVal - (float) minVal) * (max - min)); - nPreviousSteps++; - } - - int pivotVal = getAssociatedDocuments()[pivot].getDocument().getDocumentNumber(); - - // NOTE: do not store key - pivotVal because of overflows - if (target > pivotVal) { - min = pivot + 1; - max--; - } else if (target == pivotVal) { - return pivot; - } else { - min++; - max = pivot - 1; - } - maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber(); - minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber(); - } - } - - - /** - * Navigates to the view of this occurrences of this term in a specific document. - * - * This method is only used by InstantiatedIndex(IndexReader) and - * should not be optimized for less CPU at the cost of more RAM. - * - * @param documentNumber the n:th document in the index - * @return view of this term from specified document - */ - public InstantiatedTermDocumentInformation getAssociatedDocument(int documentNumber) { - int pos = binarySearchAssociatedDocuments(documentNumber); - return pos < 0 ? null : getAssociatedDocuments()[pos]; - } - - public final String field() { - return term.field(); - } - - public final String text() { - return term.text(); - } - - @Override - public String toString() { - return term.toString(); - } - - - public int getTermIndex() { - return termIndex; - } - - public void setTermIndex(int termIndex) { - this.termIndex = termIndex; - } -}