1 package org.apache.lucene.index;
3 * Copyright 2007 The Apache Software Foundation
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
21 * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s. Collects all term information
22 * into a single, SortedSet.
24 * NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not
25 * know what Fields they correlate with.
27 * This is not thread-safe
29 public class SortedTermVectorMapper extends TermVectorMapper{
32 private SortedSet<TermVectorEntry> currentSet;
33 private Map<String,TermVectorEntry> termToTVE = new HashMap<String,TermVectorEntry>();
34 private boolean storeOffsets;
35 private boolean storePositions;
37 * Stand-in name for the field in {@link TermVectorEntry}.
39 public static final String ALL = "_ALL_";
43 * @param comparator A Comparator for sorting {@link TermVectorEntry}s
45 public SortedTermVectorMapper(Comparator<TermVectorEntry> comparator) {
46 this(false, false, comparator);
50 public SortedTermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets, Comparator<TermVectorEntry> comparator) {
51 super(ignoringPositions, ignoringOffsets);
52 currentSet = new TreeSet<TermVectorEntry>(comparator);
57 * @param term The term to map
58 * @param frequency The frequency of the term
59 * @param offsets Offset information, may be null
60 * @param positions Position information, may be null
62 //We need to combine any previous mentions of the term
64 public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
65 TermVectorEntry entry = termToTVE.get(term);
67 entry = new TermVectorEntry(ALL, term, frequency,
68 storeOffsets == true ? offsets : null,
69 storePositions == true ? positions : null);
70 termToTVE.put(term, entry);
71 currentSet.add(entry);
73 entry.setFrequency(entry.getFrequency() + frequency);
76 TermVectorOffsetInfo [] existingOffsets = entry.getOffsets();
77 //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions
78 if (existingOffsets != null && offsets != null && offsets.length > 0)
80 //copy over the existing offsets
81 TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[existingOffsets.length + offsets.length];
82 System.arraycopy(existingOffsets, 0, newOffsets, 0, existingOffsets.length);
83 System.arraycopy(offsets, 0, newOffsets, existingOffsets.length, offsets.length);
84 entry.setOffsets(newOffsets);
86 else if (existingOffsets == null && offsets != null && offsets.length > 0)
88 entry.setOffsets(offsets);
94 int [] existingPositions = entry.getPositions();
95 if (existingPositions != null && positions != null && positions.length > 0)
97 int [] newPositions = new int[existingPositions.length + positions.length];
98 System.arraycopy(existingPositions, 0, newPositions, 0, existingPositions.length);
99 System.arraycopy(positions, 0, newPositions, existingPositions.length, positions.length);
100 entry.setPositions(newPositions);
102 else if (existingPositions == null && positions != null && positions.length > 0)
104 entry.setPositions(positions);
113 public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
115 this.storeOffsets = storeOffsets;
116 this.storePositions = storePositions;
120 * The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed into the constructor.
122 * This set will be empty until after the mapping process takes place.
124 * @return The SortedSet of {@link TermVectorEntry}.
126 public SortedSet<TermVectorEntry> getTermVectorEntrySet()