pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / src / java / org / apache / lucene / analysis / standard / StandardAnalyzer.java
diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java

deleted file mode 100644 (file)

index eaec2d4..0000000
--- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ /dev/null
@@ -1,136 +0,0 @@
-package org.apache.lucene.analysis.standard;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.util.Version;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Set;
-
-/**
- * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
- *
- * <a name="version"/>
- * <p>You must specify the required {@link Version}
- * compatibility when creating StandardAnalyzer:
- * <ul>
- *   <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- *        from their combining characters. If you use a previous version number,
- *        you get the exact broken behavior for backwards compatibility.
- *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
- *        and StopFilter correctly handles Unicode 4.0 supplementary characters
- *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 
- *        are the pre-3.1 implementations of StandardTokenizer and
- *        StandardAnalyzer.
- *   <li> As of 2.9, StopFilter preserves position increments
- *   <li> As of 2.4, Tokens incorrectly identified as acronyms
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- * </ul>
- */
-public final class StandardAnalyzer extends StopwordAnalyzerBase {
-
-  /** Default maximum allowed token length */
-  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-  /**
-   * Specifies whether deprecated acronyms should be replaced with HOST type.
-   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
-   */
-  private final boolean replaceInvalidAcronym;
-
-  /** An unmodifiable set containing some common English words that are usually not
-  useful for searching. */
-  public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
-
-  /** Builds an analyzer with the given stop words.
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopWords stop words */
-  public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
-    super(matchVersion, stopWords);
-    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
-  }
-
-  /** Builds an analyzer with the default stop words ({@link
-   * #STOP_WORDS_SET}).
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   */
-  public StandardAnalyzer(Version matchVersion) {
-    this(matchVersion, STOP_WORDS_SET);
-  }
-
-  /** Builds an analyzer with the stop words from the given file.
-   * @see WordlistLoader#getWordSet(File)
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopwords File to read stop words from */
-  public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
-  }
-
-  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
-   * @param matchVersion Lucene version to match See {@link
-   * <a href="#version">above</a>}
-   * @param stopwords Reader to read stop words from */
-  public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
-    this(matchVersion, WordlistLoader.getWordSet(stopwords));
-  }
-
-  /**
-   * Set maximum allowed token length.  If a token is seen
-   * that exceeds this length then it is discarded.  This
-   * setting only takes effect the next time tokenStream or
-   * reusableTokenStream is called.
-   */
-  public void setMaxTokenLength(int length) {
-    maxTokenLength = length;
-  }
-    
-  /**
-   * @see #setMaxTokenLength
-   */
-  public int getMaxTokenLength() {
-    return maxTokenLength;
-  }
-
-  @Override
-  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
-    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
-    src.setMaxTokenLength(maxTokenLength);
-    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
-    TokenStream tok = new StandardFilter(matchVersion, src);
-    tok = new LowerCaseFilter(matchVersion, tok);
-    tok = new StopFilter(matchVersion, tok, stopwords);
-    return new TokenStreamComponents(src, tok) {
-      @Override
-      protected boolean reset(final Reader reader) throws IOException {
-        src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
-        return super.reset(reader);
-      }
-    };
-  }
-}