--- /dev/null
+package org.apache.lucene.index;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * For each Field, store position by position information. It ignores frequency information
+ * <p/>
+ * This is not thread-safe.
+ */
+public class PositionBasedTermVectorMapper extends TermVectorMapper{
+ private Map<String, Map<Integer,TVPositionInfo>> fieldToTerms;
+
+ private String currentField;
+ /**
+ * A Map of Integer and TVPositionInfo
+ */
+ private Map<Integer,TVPositionInfo> currentPositions;
+ private boolean storeOffsets;
+
+
+
+
+ /**
+ *
+ *
+ */
+ public PositionBasedTermVectorMapper() {
+ super(false, false);
+ }
+
+ public PositionBasedTermVectorMapper(boolean ignoringOffsets)
+ {
+ super(false, ignoringOffsets);
+ }
+
+ /**
+ * Never ignores positions. This mapper doesn't make much sense unless there are positions
+ * @return false
+ */
+ @Override
+ public boolean isIgnoringPositions() {
+ return false;
+ }
+
+ /**
+ * Callback for the TermVectorReader.
+ * @param term
+ * @param frequency
+ * @param offsets
+ * @param positions
+ */
+ @Override
+ public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ for (int i = 0; i < positions.length; i++) {
+ Integer posVal = Integer.valueOf(positions[i]);
+ TVPositionInfo pos = currentPositions.get(posVal);
+ if (pos == null) {
+ pos = new TVPositionInfo(positions[i], storeOffsets);
+ currentPositions.put(posVal, pos);
+ }
+ pos.addTerm(term, offsets != null ? offsets[i] : null);
+ }
+ }
+
+ /**
+ * Callback mechanism used by the TermVectorReader
+ * @param field The field being read
+ * @param numTerms The number of terms in the vector
+ * @param storeOffsets Whether offsets are available
+ * @param storePositions Whether positions are available
+ */
+ @Override
+ public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+ if (storePositions == false)
+ {
+ throw new RuntimeException("You must store positions in order to use this Mapper");
+ }
+ if (storeOffsets == true)
+ {
+ //ignoring offsets
+ }
+ fieldToTerms = new HashMap<String,Map<Integer,TVPositionInfo>>(numTerms);
+ this.storeOffsets = storeOffsets;
+ currentField = field;
+ currentPositions = new HashMap<Integer,TVPositionInfo>();
+ fieldToTerms.put(currentField, currentPositions);
+ }
+
+ /**
+ * Get the mapping between fields and terms, sorted by the comparator
+ *
+ * @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is {@link org.apache.lucene.index.PositionBasedTermVectorMapper.TVPositionInfo}.
+ */
+ public Map<String,Map<Integer,TVPositionInfo>> getFieldToTerms() {
+ return fieldToTerms;
+ }
+
+ /**
+ * Container for a term at a position
+ */
+ public static class TVPositionInfo{
+ private int position;
+
+ private List<String> terms;
+
+ private List<TermVectorOffsetInfo> offsets;
+
+
+ public TVPositionInfo(int position, boolean storeOffsets) {
+ this.position = position;
+ terms = new ArrayList<String>();
+ if (storeOffsets) {
+ offsets = new ArrayList<TermVectorOffsetInfo>();
+ }
+ }
+
+ void addTerm(String term, TermVectorOffsetInfo info)
+ {
+ terms.add(term);
+ if (offsets != null) {
+ offsets.add(info);
+ }
+ }
+
+ /**
+ *
+ * @return The position of the term
+ */
+ public int getPosition() {
+ return position;
+ }
+
+ /**
+ * Note, there may be multiple terms at the same position
+ * @return A List of Strings
+ */
+ public List<String> getTerms() {
+ return terms;
+ }
+
+ /**
+ * Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple entries since there may be multiple terms at a position
+ * @return A List of TermVectorOffsetInfo objects, if offsets are stored.
+ */
+ public List<TermVectorOffsetInfo> getOffsets() {
+ return offsets;
+ }
+ }
+
+
+}