X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
deleted file mode 100644
index bd3d796..0000000
--- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package org.apache.lucene.analysis.standard;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.StopAnalyzer;
-import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WordlistLoader;
-import org.apache.lucene.util.Version;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Set;
-
-/**
- * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
- *
- *
- *
You must specify the required {@link Version}
- * compatibility when creating ClassicAnalyzer:
- *
- * - As of 3.1, StopFilter correctly handles Unicode 4.0
- * supplementary characters in stopwords
- *
- As of 2.9, StopFilter preserves position
- * increments
- *
- As of 2.4, Tokens incorrectly identified as acronyms
- * are corrected (see LUCENE-1068)
- *
- *
- * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1.
- * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
- * as specified by UAX#29.
- */
-public final class ClassicAnalyzer extends StopwordAnalyzerBase {
-
- /** Default maximum allowed token length */
- public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
- /**
- * Specifies whether deprecated acronyms should be replaced with HOST type.
- * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
- */
- private final boolean replaceInvalidAcronym;
-
- /** An unmodifiable set containing some common English words that are usually not
- useful for searching. */
- public static final Set> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
- /** Builds an analyzer with the given stop words.
- * @param matchVersion Lucene version to match See {@link
- * above}
- * @param stopWords stop words */
- public ClassicAnalyzer(Version matchVersion, Set> stopWords) {
- super(matchVersion, stopWords);
- replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
- }
-
- /** Builds an analyzer with the default stop words ({@link
- * #STOP_WORDS_SET}).
- * @param matchVersion Lucene version to match See {@link
- * above}
- */
- public ClassicAnalyzer(Version matchVersion) {
- this(matchVersion, STOP_WORDS_SET);
- }
-
- /** Builds an analyzer with the stop words from the given file.
- * @see WordlistLoader#getWordSet(File)
- * @param matchVersion Lucene version to match See {@link
- * above}
- * @param stopwords File to read stop words from */
- public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
- this(matchVersion, WordlistLoader.getWordSet(stopwords));
- }
-
- /** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader)
- * @param matchVersion Lucene version to match See {@link
- * above}
- * @param stopwords Reader to read stop words from */
- public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
- this(matchVersion, WordlistLoader.getWordSet(stopwords));
- }
-
- /**
- * Set maximum allowed token length. If a token is seen
- * that exceeds this length then it is discarded. This
- * setting only takes effect the next time tokenStream or
- * reusableTokenStream is called.
- */
- public void setMaxTokenLength(int length) {
- maxTokenLength = length;
- }
-
- /**
- * @see #setMaxTokenLength
- */
- public int getMaxTokenLength() {
- return maxTokenLength;
- }
-
- @Override
- protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
- final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
- src.setMaxTokenLength(maxTokenLength);
- src.setReplaceInvalidAcronym(replaceInvalidAcronym);
- TokenStream tok = new ClassicFilter(src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new StopFilter(matchVersion, tok, stopwords);
- return new TokenStreamComponents(src, tok) {
- @Override
- protected boolean reset(final Reader reader) throws IOException {
- src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
- return super.reset(reader);
- }
- };
- }
-}