2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.analysis;
21 import java.io.IOException;
22 import java.io.Reader;
25 import org.apache.lucene.analysis.CharArraySet;
26 import org.apache.lucene.analysis.ReusableAnalyzerBase;
27 import org.apache.lucene.analysis.WordlistLoader;
28 import org.apache.lucene.util.IOUtils;
29 import org.apache.lucene.util.Version;
32 * Base class for Analyzers that need to make use of stopword sets.
35 public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
38 * An immutable stopword set
40 protected final CharArraySet stopwords;
42 protected final Version matchVersion;
45 * Returns the analyzer's stopword set or an empty set if the analyzer has no
48 * @return the analyzer's stopword set or an empty set if the analyzer has no
51 public Set<?> getStopwordSet() {
56 * Creates a new instance initialized with the given stopword set
59 * the Lucene version for cross version compatibility
61 * the analyzer's stopword set
63 protected StopwordAnalyzerBase(final Version version, final Set<?> stopwords) {
64 matchVersion = version;
65 // analyzers should use char array set for stopwords!
66 this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
67 .unmodifiableSet(CharArraySet.copy(version, stopwords));
71 * Creates a new Analyzer with an empty stopword set
74 * the Lucene version for cross version compatibility
76 protected StopwordAnalyzerBase(final Version version) {
81 * Creates a CharArraySet from a file resource associated with a class. (See
82 * {@link Class#getResourceAsStream(String)}).
85 * <code>true</code> if the set should ignore the case of the
86 * stopwords, otherwise <code>false</code>
88 * a class that is associated with the given stopwordResource
90 * name of the resource file associated with the given class
92 * comment string to ignore in the stopword file
93 * @return a CharArraySet containing the distinct stopwords from the given
96 * if loading the stopwords throws an {@link IOException}
98 protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
99 final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
100 final String comment) throws IOException {
101 Reader reader = null;
103 reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
104 return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_31, 16, ignoreCase));
106 IOUtils.close(reader);
112 * Creates a CharArraySet from a file.
115 * the stopwords file to load
117 * @param matchVersion
118 * the Lucene version for cross version compatibility
119 * @return a CharArraySet containing the distinct stopwords from the given
121 * @throws IOException
122 * if loading the stopwords throws an {@link IOException}
124 protected static CharArraySet loadStopwordSet(File stopwords,
125 Version matchVersion) throws IOException {
126 Reader reader = null;
128 reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
129 return WordlistLoader.getWordSet(reader, matchVersion);
131 IOUtils.close(reader);
136 * Creates a CharArraySet from a file.
139 * the stopwords reader to load
141 * @param matchVersion
142 * the Lucene version for cross version compatibility
143 * @return a CharArraySet containing the distinct stopwords from the given
145 * @throws IOException
146 * if loading the stopwords throws an {@link IOException}
148 protected static CharArraySet loadStopwordSet(Reader stopwords,
149 Version matchVersion) throws IOException {
151 return WordlistLoader.getWordSet(stopwords, matchVersion);
153 IOUtils.close(stopwords);