2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.analysis;
20 import java.io.IOException;
21 import java.io.Reader;
24 * An convenience subclass of Analyzer that makes it easy to implement
25 * {@link TokenStream} reuse.
27 * ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
28 * for the most common use-cases. Analyzers such as
29 * {@link PerFieldAnalyzerWrapper} that behave differently depending upon the
30 * field name need to subclass Analyzer directly instead.
33 * To prevent consistency problems, this class does not allow subclasses to
34 * extend {@link #reusableTokenStream(String, Reader)} or
35 * {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
36 * implement {@link #createComponents(String, Reader)}.
39 public abstract class ReusableAnalyzerBase extends Analyzer {
42 * Creates a new {@link TokenStreamComponents} instance for this analyzer.
45 * the name of the fields content passed to the
46 * {@link TokenStreamComponents} sink as a reader
48 * the reader passed to the {@link Tokenizer} constructor
49 * @return the {@link TokenStreamComponents} for this analyzer.
51 protected abstract TokenStreamComponents createComponents(String fieldName,
55 * This method uses {@link #createComponents(String, Reader)} to obtain an
56 * instance of {@link TokenStreamComponents}. It returns the sink of the
57 * components and stores the components internally. Subsequent calls to this
58 * method will reuse the previously stored components if and only if the
59 * {@link TokenStreamComponents#reset(Reader)} method returned
60 * <code>true</code>. Otherwise a new instance of
61 * {@link TokenStreamComponents} is created.
63 * @param fieldName the name of the field the created TokenStream is used for
64 * @param reader the reader the streams source reads from
67 public final TokenStream reusableTokenStream(final String fieldName,
68 final Reader reader) throws IOException {
69 TokenStreamComponents streamChain = (TokenStreamComponents)
70 getPreviousTokenStream();
71 final Reader r = initReader(reader);
72 if (streamChain == null || !streamChain.reset(r)) {
73 streamChain = createComponents(fieldName, r);
74 setPreviousTokenStream(streamChain);
76 return streamChain.getTokenStream();
80 * This method uses {@link #createComponents(String, Reader)} to obtain an
81 * instance of {@link TokenStreamComponents} and returns the sink of the
82 * components. Each calls to this method will create a new instance of
83 * {@link TokenStreamComponents}. Created {@link TokenStream} instances are
86 * @param fieldName the name of the field the created TokenStream is used for
87 * @param reader the reader the streams source reads from
90 public final TokenStream tokenStream(final String fieldName,
91 final Reader reader) {
92 return createComponents(fieldName, initReader(reader)).getTokenStream();
96 * Override this if you want to add a CharFilter chain.
98 protected Reader initReader(Reader reader) {
103 * This class encapsulates the outer components of a token stream. It provides
104 * access to the source ({@link Tokenizer}) and the outer end (sink), an
105 * instance of {@link TokenFilter} which also serves as the
106 * {@link TokenStream} returned by
107 * {@link Analyzer#tokenStream(String, Reader)} and
108 * {@link Analyzer#reusableTokenStream(String, Reader)}.
110 public static class TokenStreamComponents {
111 protected final Tokenizer source;
112 protected final TokenStream sink;
115 * Creates a new {@link TokenStreamComponents} instance.
118 * the analyzer's tokenizer
120 * the analyzer's resulting token stream
122 public TokenStreamComponents(final Tokenizer source,
123 final TokenStream result) {
124 this.source = source;
129 * Creates a new {@link TokenStreamComponents} instance.
132 * the analyzer's tokenizer
134 public TokenStreamComponents(final Tokenizer source) {
135 this.source = source;
140 * Resets the encapsulated components with the given reader. This method by
141 * default returns <code>true</code> indicating that the components have
142 * been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
143 * their own {@link TokenStreamComponents} returning <code>false</code> if
144 * the components cannot be reset.
147 * a reader to reset the source component
148 * @return <code>true</code> if the components were reset, otherwise
150 * @throws IOException
151 * if the component's reset method throws an {@link IOException}
153 protected boolean reset(final Reader reader) throws IOException {
154 source.reset(reader);
159 * Returns the sink {@link TokenStream}
161 * @return the sink {@link TokenStream}
163 protected TokenStream getTokenStream() {