--- /dev/null
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/**
+ * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}, using a list of
+ * English stop words.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ClassicAnalyzer:
+ * <ul>
+ * <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ * supplementary characters in stopwords
+ * <li> As of 2.9, StopFilter preserves position
+ * increments
+ * <li> As of 2.4, Tokens incorrectly identified as acronyms
+ * are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
+ *
+ * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
+ * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
+ * as specified by UAX#29.
+ */
+public final class ClassicAnalyzer extends StopwordAnalyzerBase {
+
+ /** Default maximum allowed token length */
+ public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+ /**
+ * Specifies whether deprecated acronyms should be replaced with HOST type.
+ * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
+ */
+ private final boolean replaceInvalidAcronym;
+
+ /** An unmodifiable set containing some common English words that are usually not
+ useful for searching. */
+ public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+ /** Builds an analyzer with the given stop words.
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
+ * @param stopWords stop words */
+ public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
+ super(matchVersion, stopWords);
+ replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
+ }
+
+ /** Builds an analyzer with the default stop words ({@link
+ * #STOP_WORDS_SET}).
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
+ */
+ public ClassicAnalyzer(Version matchVersion) {
+ this(matchVersion, STOP_WORDS_SET);
+ }
+
+ /** Builds an analyzer with the stop words from the given file.
+ * @see WordlistLoader#getWordSet(Reader, Version)
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
+ * @param stopwords File to read stop words from */
+ public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
+ this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords,
+ IOUtils.CHARSET_UTF_8), matchVersion));
+ }
+
+ /** Builds an analyzer with the stop words from the given reader.
+ * @see WordlistLoader#getWordSet(Reader, Version)
+ * @param matchVersion Lucene version to match See {@link
+ * <a href="#version">above</a>}
+ * @param stopwords Reader to read stop words from */
+ public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+ this(matchVersion, WordlistLoader.getWordSet(stopwords, matchVersion));
+ }
+
+ /**
+ * Set maximum allowed token length. If a token is seen
+ * that exceeds this length then it is discarded. This
+ * setting only takes effect the next time tokenStream or
+ * reusableTokenStream is called.
+ */
+ public void setMaxTokenLength(int length) {
+ maxTokenLength = length;
+ }
+
+ /**
+ * @see #setMaxTokenLength
+ */
+ public int getMaxTokenLength() {
+ return maxTokenLength;
+ }
+
+ @Override
+ protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+ final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+ src.setMaxTokenLength(maxTokenLength);
+ src.setReplaceInvalidAcronym(replaceInvalidAcronym);
+ TokenStream tok = new ClassicFilter(src);
+ tok = new LowerCaseFilter(matchVersion, tok);
+ tok = new StopFilter(matchVersion, tok, stopwords);
+ return new TokenStreamComponents(src, tok) {
+ @Override
+ protected boolean reset(final Reader reader) throws IOException {
+ src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
+ return super.reset(reader);
+ }
+ };
+ }
+}