1 package org.apache.lucene.index;
3 * Copyright 2007 The Apache Software Foundation
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 import java.util.ArrayList;
20 import java.util.HashMap;
21 import java.util.List;
25 * For each Field, store position by position information. It ignores frequency information
27 * This is not thread-safe.
29 public class PositionBasedTermVectorMapper extends TermVectorMapper{
30 private Map<String, Map<Integer,TVPositionInfo>> fieldToTerms;
32 private String currentField;
34 * A Map of Integer and TVPositionInfo
36 private Map<Integer,TVPositionInfo> currentPositions;
37 private boolean storeOffsets;
46 public PositionBasedTermVectorMapper() {
50 public PositionBasedTermVectorMapper(boolean ignoringOffsets)
52 super(false, ignoringOffsets);
56 * Never ignores positions. This mapper doesn't make much sense unless there are positions
60 public boolean isIgnoringPositions() {
65 * Callback for the TermVectorReader.
72 public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
73 for (int i = 0; i < positions.length; i++) {
74 Integer posVal = Integer.valueOf(positions[i]);
75 TVPositionInfo pos = currentPositions.get(posVal);
77 pos = new TVPositionInfo(positions[i], storeOffsets);
78 currentPositions.put(posVal, pos);
80 pos.addTerm(term, offsets != null ? offsets[i] : null);
85 * Callback mechanism used by the TermVectorReader
86 * @param field The field being read
87 * @param numTerms The number of terms in the vector
88 * @param storeOffsets Whether offsets are available
89 * @param storePositions Whether positions are available
92 public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
93 if (storePositions == false)
95 throw new RuntimeException("You must store positions in order to use this Mapper");
97 if (storeOffsets == true)
101 fieldToTerms = new HashMap<String,Map<Integer,TVPositionInfo>>(numTerms);
102 this.storeOffsets = storeOffsets;
103 currentField = field;
104 currentPositions = new HashMap<Integer,TVPositionInfo>();
105 fieldToTerms.put(currentField, currentPositions);
109 * Get the mapping between fields and terms, sorted by the comparator
111 * @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link org.apache.lucene.index.PositionBasedTermVectorMapper.TVPositionInfo}.
113 public Map<String,Map<Integer,TVPositionInfo>> getFieldToTerms() {
118 * Container for a term at a position
120 public static class TVPositionInfo{
121 private int position;
123 private List<String> terms;
125 private List<TermVectorOffsetInfo> offsets;
128 public TVPositionInfo(int position, boolean storeOffsets) {
129 this.position = position;
130 terms = new ArrayList<String>();
132 offsets = new ArrayList<TermVectorOffsetInfo>();
136 void addTerm(String term, TermVectorOffsetInfo info)
139 if (offsets != null) {
146 * @return The position of the term
148 public int getPosition() {
153 * Note, there may be multiple terms at the same position
154 * @return A List of Strings
156 public List<String> getTerms() {
161 * Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position
162 * @return A List of TermVectorOffsetInfo objects, if offsets are stored.
164 public List<TermVectorOffsetInfo> getOffsets() {