X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java new file mode 100644 index 0000000..6fccec8 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java @@ -0,0 +1,131 @@ +package org.apache.lucene.index; +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.*; + +/** + * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s. Collects all term information + * into a single, SortedSet. + *
+ * NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not + * know what Fields they correlate with. + *
+ * This is not thread-safe + */ +public class SortedTermVectorMapper extends TermVectorMapper{ + + + private SortedSet currentSet; + private Map termToTVE = new HashMap(); + private boolean storeOffsets; + private boolean storePositions; + /** + * Stand-in name for the field in {@link TermVectorEntry}. + */ + public static final String ALL = "_ALL_"; + + /** + * + * @param comparator A Comparator for sorting {@link TermVectorEntry}s + */ + public SortedTermVectorMapper(Comparator comparator) { + this(false, false, comparator); + } + + + public SortedTermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets, Comparator comparator) { + super(ignoringPositions, ignoringOffsets); + currentSet = new TreeSet(comparator); + } + + /** + * + * @param term The term to map + * @param frequency The frequency of the term + * @param offsets Offset information, may be null + * @param positions Position information, may be null + */ + //We need to combine any previous mentions of the term + @Override + public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) { + TermVectorEntry entry = termToTVE.get(term); + if (entry == null) { + entry = new TermVectorEntry(ALL, term, frequency, + storeOffsets == true ? offsets : null, + storePositions == true ? positions : null); + termToTVE.put(term, entry); + currentSet.add(entry); + } else { + entry.setFrequency(entry.getFrequency() + frequency); + if (storeOffsets) + { + TermVectorOffsetInfo [] existingOffsets = entry.getOffsets(); + //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions + if (existingOffsets != null && offsets != null && offsets.length > 0) + { + //copy over the existing offsets + TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[existingOffsets.length + offsets.length]; + System.arraycopy(existingOffsets, 0, newOffsets, 0, existingOffsets.length); + System.arraycopy(offsets, 0, newOffsets, existingOffsets.length, offsets.length); + entry.setOffsets(newOffsets); + } + else if (existingOffsets == null && offsets != null && offsets.length > 0) + { + entry.setOffsets(offsets); + } + //else leave it alone + } + if (storePositions) + { + int [] existingPositions = entry.getPositions(); + if (existingPositions != null && positions != null && positions.length > 0) + { + int [] newPositions = new int[existingPositions.length + positions.length]; + System.arraycopy(existingPositions, 0, newPositions, 0, existingPositions.length); + System.arraycopy(positions, 0, newPositions, existingPositions.length, positions.length); + entry.setPositions(newPositions); + } + else if (existingPositions == null && positions != null && positions.length > 0) + { + entry.setPositions(positions); + } + } + } + + + } + + @Override + public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) { + + this.storeOffsets = storeOffsets; + this.storePositions = storePositions; + } + + /** + * The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed into the constructor. + *
+ * This set will be empty until after the mapping process takes place. + * + * @return The SortedSet of {@link TermVectorEntry}. + */ + public SortedSet getTermVectorEntrySet() + { + return currentSet; + } + +}