1 package org.apache.lucene.analysis.path;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.io.IOException;
20 import java.io.Reader;
21 import java.util.ArrayList;
22 import java.util.List;
24 import org.apache.lucene.analysis.Tokenizer;
25 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
26 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
27 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
31 * Take something like:
47 public class ReversePathHierarchyTokenizer extends Tokenizer {
49 public ReversePathHierarchyTokenizer(Reader input) {
50 this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
53 public ReversePathHierarchyTokenizer(Reader input, int skip) {
54 this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
57 public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
58 this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
61 public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
62 this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
65 public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
66 this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP);
69 public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) {
70 this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip);
73 public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
74 this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
77 public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
79 termAtt.resizeBuffer(bufferSize);
80 this.delimiter = delimiter;
81 this.replacement = replacement;
83 resultToken = new StringBuilder(bufferSize);
84 resultTokenBuffer = new char[bufferSize];
85 delimiterPositions = new ArrayList<Integer>(bufferSize/10);
88 private static final int DEFAULT_BUFFER_SIZE = 1024;
89 public static final char DEFAULT_DELIMITER = '/';
90 public static final int DEFAULT_SKIP = 0;
92 private final char delimiter;
93 private final char replacement;
94 private final int skip;
96 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
97 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
98 private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
100 private int endPosition = 0;
101 private int finalOffset = 0;
102 private int skipped = 0;
103 private StringBuilder resultToken;
105 private List<Integer> delimiterPositions;
106 private int delimitersCount = -1;
107 private char[] resultTokenBuffer;
110 public final boolean incrementToken() throws IOException {
112 if(delimitersCount == -1){
114 delimiterPositions.add(0);
116 int c = input.read();
121 if( c == delimiter ) {
122 delimiterPositions.add(length);
123 resultToken.append(replacement);
126 resultToken.append((char)c);
129 delimitersCount = delimiterPositions.size();
130 if( delimiterPositions.get(delimitersCount-1) < length ){
131 delimiterPositions.add(length);
134 if( resultTokenBuffer.length < resultToken.length() ){
135 resultTokenBuffer = new char[resultToken.length()];
137 resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
138 resultToken.setLength(0);
139 endPosition = delimiterPositions.get(delimitersCount-1 - skip);
140 finalOffset = correctOffset(length);
141 posAtt.setPositionIncrement(1);
144 posAtt.setPositionIncrement(0);
147 while( skipped < delimitersCount-skip-1 ){
148 int start = delimiterPositions.get(skipped);
149 termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
150 offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
159 public final void end() {
161 offsetAtt.setOffset(finalOffset, finalOffset);
165 public void reset(Reader input) throws IOException {
167 resultToken.setLength(0);
170 delimitersCount = -1;
171 delimiterPositions.clear();