X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
diff --git a/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java b/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
new file mode 100644
index 0000000..02e00f8
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
@@ -0,0 +1,369 @@
+package org.apache.lucene.queryParser.analyzing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.Version;
+
+/**
+ * Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
+ * are also passed through the given analyzer, but wild card characters (like *
)
+ * don't get removed from the search terms.
+ *
+ *
Warning: This class should only be used with analyzers that do not use stopwords
+ * or that add tokens. Also, several stemming analyzers are inappropriate: for example, GermanAnalyzer
+ * will turn Häuser
into hau
, but H?user
will
+ * become h?user
when using this parser and thus no match would be found (i.e.
+ * using this parser will be no improvement over QueryParser in such cases).
+ *
+ * @version $Revision$, $Date$
+ */
+public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryParser {
+
+ /**
+ * Constructs a query parser.
+ * @param field the default field for query terms.
+ * @param analyzer used to find terms in the query text.
+ */
+ public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
+ super(matchVersion, field, analyzer);
+ }
+
+ /**
+ * Called when parser
+ * parses an input term token that contains one or more wildcard
+ * characters (like *
), but is not a prefix term token (one
+ * that has just a single * character at the end).
+ *
+ * Example: will be called for H?user
or for H*user
+ * but not for *user
.
+ *
+ * Depending on analyzer and settings, a wildcard term may (most probably will) + * be lower-cased automatically. It will go through the default Analyzer. + *
+ * Overrides super class, by passing terms through analyzer.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains one or more wild card
+ * characters (? or *), but is not simple prefix term
+ *
+ * @return Resulting {@link Query} built for the term
+ * @throws ParseException
+ */
+ @Override
+ protected Query getWildcardQuery(String field, String termStr) throws ParseException {
+ List
+ * Depending on analyzer and settings, a prefix term may (most probably will)
+ * be lower-cased automatically. It will go through the default Analyzer.
+ *
+ * Overrides super class, by passing terms through analyzer.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ * (without trailing '*' character!)
+ *
+ * @return Resulting {@link Query} built for the term
+ * @throws ParseException
+ */
+ @Override
+ protected Query getPrefixQuery(String field, String termStr) throws ParseException {
+ // get Analyzer from superclass and tokenize the term
+ TokenStream source;
+ List
+ * Depending on analyzer and settings, a fuzzy term may (most probably will)
+ * be lower-cased automatically. It will go through the default Analyzer.
+ *
+ * Overrides super class, by passing terms through analyzer.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException
+ */
+ @Override
+ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
+ throws ParseException {
+ // get Analyzer from superclass and tokenize the term
+ TokenStream source = null;
+ String nextToken = null;
+ boolean multipleTokens = false;
+
+ try {
+ source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
+ source.reset();
+ if (source.incrementToken()) {
+ nextToken = termAtt.toString();
+ }
+ multipleTokens = source.incrementToken();
+ } catch (IOException e) {
+ nextToken = null;
+ }
+
+ try {
+ source.end();
+ source.close();
+ } catch (IOException e) {
+ // ignore
+ }
+
+ if (multipleTokens) {
+ throw new ParseException("Cannot build FuzzyQuery with analyzer " + getAnalyzer().getClass()
+ + " - tokens were added");
+ }
+
+ return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
+ }
+
+ /**
+ * Overrides super class, by passing terms through analyzer.
+ * @exception ParseException
+ */
+ @Override
+ protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive)
+ throws ParseException {
+ // get Analyzer from superclass and tokenize the terms
+ TokenStream source = null;
+ CharTermAttribute termAtt = null;
+ boolean multipleTokens = false;
+
+ if (part1 != null) {
+ // part1
+ try {
+ source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
+ termAtt = source.addAttribute(CharTermAttribute.class);
+ source.reset();
+ multipleTokens = false;
+
+
+ if (source.incrementToken()) {
+ part1 = termAtt.toString();
+ }
+ multipleTokens = source.incrementToken();
+ } catch (IOException e) {
+ // ignore
+ }
+
+ try {
+ source.end();
+ source.close();
+ } catch (IOException e) {
+ // ignore
+ }
+ if (multipleTokens) {
+ throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ + " - tokens were added to part1");
+ }
+ }
+ try {
+ source.close();
+ } catch (IOException e) {
+ // ignore
+ }
+ if (multipleTokens) {
+ throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ + " - tokens were added to part1");
+ }
+
+ if (part2 != null) {
+ try {
+ // part2
+ source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
+ termAtt = source.addAttribute(CharTermAttribute.class);
+ source.reset();
+ if (source.incrementToken()) {
+ part2 = termAtt.toString();
+ }
+ multipleTokens = source.incrementToken();
+ } catch (IOException e) {
+ // ignore
+ }
+ try {
+ source.end();
+ source.close();
+ } catch (IOException e) {
+ // ignore
+ }
+ if (multipleTokens) {
+ throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ + " - tokens were added to part2");
+ }
+ }
+ try {
+ source.close();
+ } catch (IOException e) {
+ // ignore
+ }
+ if (multipleTokens) {
+ throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ + " - tokens were added to part2");
+ }
+ return super.getRangeQuery(field, part1, part2, inclusive);
+ }
+
+}