1 package org.apache.lucene.analysis.path;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import java.io.IOException;
20 import java.io.Reader;
22 import org.apache.lucene.analysis.Tokenizer;
23 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
24 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
25 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
29 * Take something like:
32 * /something/something/else
39 * /something/something
40 * /something/something/else
43 public class PathHierarchyTokenizer extends Tokenizer {
45 public PathHierarchyTokenizer(Reader input) {
46 this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
49 public PathHierarchyTokenizer(Reader input, int skip) {
50 this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
53 public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
54 this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
57 public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
58 this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
61 public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
62 this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
65 public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
67 termAtt.resizeBuffer(bufferSize);
69 this.delimiter = delimiter;
70 this.replacement = replacement;
72 resultToken = new StringBuilder(bufferSize);
75 private static final int DEFAULT_BUFFER_SIZE = 1024;
76 public static final char DEFAULT_DELIMITER = '/';
77 public static final int DEFAULT_SKIP = 0;
79 private final char delimiter;
80 private final char replacement;
81 private final int skip;
83 private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
84 private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
85 private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
86 private int startPosition = 0;
87 private int finalOffset = 0;
88 private int skipped = 0;
89 private boolean endDelimiter = false;
90 private StringBuilder resultToken;
94 public final boolean incrementToken() throws IOException {
96 termAtt.append( resultToken );
97 if(resultToken.length() == 0){
98 posAtt.setPositionIncrement(1);
101 posAtt.setPositionIncrement(0);
104 boolean added = false;
106 termAtt.append(replacement);
108 endDelimiter = false;
113 int c = input.read();
115 if( skipped > skip ) {
116 length += resultToken.length();
117 termAtt.setLength(length);
118 finalOffset = correctOffset(startPosition + length);
119 offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
121 resultToken.setLength(0);
122 resultToken.append(termAtt.buffer(), 0, length);
127 finalOffset = correctOffset(startPosition + length);
134 if( skipped > skip ){
135 termAtt.append(c == delimiter ? replacement : (char)c);
143 if( c == delimiter ){
144 if( skipped > skip ){
149 if( skipped > skip ){
150 termAtt.append(replacement);
158 if( skipped > skip ){
159 termAtt.append((char)c);
168 length += resultToken.length();
169 termAtt.setLength(length);
170 finalOffset = correctOffset(startPosition + length);
171 offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
172 resultToken.setLength(0);
173 resultToken.append(termAtt.buffer(), 0, length);
178 public final void end() {
180 offsetAtt.setOffset(finalOffset, finalOffset);
184 public void reset(Reader input) throws IOException {
186 resultToken.setLength(0);
188 endDelimiter = false;