2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 package org.apache.lucene.analysis;
20 import java.io.IOException;
23 import org.apache.lucene.analysis.CharArraySet;
24 import org.apache.lucene.analysis.ReusableAnalyzerBase;
25 import org.apache.lucene.analysis.WordlistLoader;
26 import org.apache.lucene.util.Version;
29 * Base class for Analyzers that need to make use of stopword sets.
32 public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
35 * An immutable stopword set
37 protected final CharArraySet stopwords;
39 protected final Version matchVersion;
42 * Returns the analyzer's stopword set or an empty set if the analyzer has no
45 * @return the analyzer's stopword set or an empty set if the analyzer has no
48 public Set<?> getStopwordSet() {
53 * Creates a new instance initialized with the given stopword set
56 * the Lucene version for cross version compatibility
58 * the analyzer's stopword set
60 protected StopwordAnalyzerBase(final Version version, final Set<?> stopwords) {
61 matchVersion = version;
62 // analyzers should use char array set for stopwords!
63 this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
64 .unmodifiableSet(CharArraySet.copy(version, stopwords));
68 * Creates a new Analyzer with an empty stopword set
71 * the Lucene version for cross version compatibility
73 protected StopwordAnalyzerBase(final Version version) {
78 * Creates a CharArraySet from a file resource associated with a class. (See
79 * {@link Class#getResourceAsStream(String)}).
82 * <code>true</code> if the set should ignore the case of the
83 * stopwords, otherwise <code>false</code>
85 * a class that is associated with the given stopwordResource
87 * name of the resource file associated with the given class
89 * comment string to ignore in the stopword file
90 * @return a CharArraySet containing the distinct stopwords from the given
93 * if loading the stopwords throws an {@link IOException}
95 protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
96 final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
97 final String comment) throws IOException {
98 final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
100 final CharArraySet set = new CharArraySet(Version.LUCENE_31, wordSet.size(), ignoreCase);