X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj diff --git a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj deleted file mode 100644 index 908edd0..0000000 --- a/lucene-java-3.4.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ /dev/null @@ -1,1440 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -options { - STATIC=false; - JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=true; -} - -PARSER_BEGIN(QueryParser) - -package org.apache.lucene.queryParser; - -import java.io.IOException; -import java.io.StringReader; -import java.text.Collator; -import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.DateField; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MultiPhraseQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.Version; -import org.apache.lucene.util.VirtualMethod; - -/** - * This class is generated by JavaCC. The most important method is - * {@link #parse(String)}. - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: - *
+
) or a minus (-
) sign, indicating
- * that the clause is required or prohibited respectively; or
- * +
/-
prefix to require any of a set of
- * terms.
- * - * Query ::= ( Clause )* - * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) - *- * - *
- * Examples of appropriately formatted queries can be found in the query syntax - * documentation. - *
- * - *- * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. - * date:[6/1/2005 TO 6/4/2005] produces a range query that searches - * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format - * of the accepted input depends on {@link #setLocale(Locale) the locale}. - * By default a date is converted into a search term using the deprecated - * {@link DateField} for compatibility reasons. - * To use the new {@link DateTools} to convert dates, a - * {@link org.apache.lucene.document.DateTools.Resolution} has to be set. - *
- *- * The date resolution that shall be used for RangeQueries can be set - * using {@link #setDateResolution(DateTools.Resolution)} - * or {@link #setDateResolution(String, DateTools.Resolution)}. The former - * sets the default date resolution for all fields, whereas the latter can - * be used to set field specific date resolutions. Field specific date - * resolutions take, if set, precedence over the default date resolution. - *
- *- * If you use neither {@link DateField} nor {@link DateTools} in your - * index, you can create your own - * query parser that inherits QueryParser and overwrites - * {@link #getRangeQuery(String, String, String, boolean)} to - * use a different method for date conversion. - *
- * - *Note that QueryParser is not thread-safe.
- * - *NOTE: there is a new QueryParser in contrib, which matches - * the same syntax as this class, but is more modular, - * enabling substantial customization to how a query is created. - * - * - *
NOTE: You must specify the required {@link Version} - * compatibility when creating QueryParser: - *
- * Set to false if phrase queries should only be generated when
- * surrounded by double quotes.
- */
- public final void setAutoGeneratePhraseQueries(boolean value) {
- if (value == false && !hasNewAPI)
- throw new IllegalArgumentException("You must implement the new API: getFieldQuery(String,String,boolean)"
- + " to use setAutoGeneratePhraseQueries(false)");
- this.autoGeneratePhraseQueries = value;
- }
-
- /**
- * Get the minimal similarity for fuzzy queries.
- */
- public float getFuzzyMinSim() {
- return fuzzyMinSim;
- }
-
- /**
- * Set the minimum similarity for fuzzy queries.
- * Default is 0.5f.
- */
- public void setFuzzyMinSim(float fuzzyMinSim) {
- this.fuzzyMinSim = fuzzyMinSim;
- }
-
- /**
- * Get the prefix length for fuzzy queries.
- * @return Returns the fuzzyPrefixLength.
- */
- public int getFuzzyPrefixLength() {
- return fuzzyPrefixLength;
- }
-
- /**
- * Set the prefix length for fuzzy queries. Default is 0.
- * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
- */
- public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
- this.fuzzyPrefixLength = fuzzyPrefixLength;
- }
-
- /**
- * Sets the default slop for phrases. If zero, then exact phrase matches
- * are required. Default value is zero.
- */
- public void setPhraseSlop(int phraseSlop) {
- this.phraseSlop = phraseSlop;
- }
-
- /**
- * Gets the default slop for phrases.
- */
- public int getPhraseSlop() {
- return phraseSlop;
- }
-
-
- /**
- * Set to true
to allow leading wildcard characters.
- *
- * When set, *
or ?
are allowed as
- * the first character of a PrefixQuery and WildcardQuery.
- * Note that this can produce very slow
- * queries on big indexes.
- *
- * Default: false.
- */
- public void setAllowLeadingWildcard(boolean allowLeadingWildcard) {
- this.allowLeadingWildcard = allowLeadingWildcard;
- }
-
- /**
- * @see #setAllowLeadingWildcard(boolean)
- */
- public boolean getAllowLeadingWildcard() {
- return allowLeadingWildcard;
- }
-
- /**
- * Set to true
to enable position increments in result query.
- *
- * When set, result phrase and multi-phrase queries will - * be aware of position increments. - * Useful when e.g. a StopFilter increases the position increment of - * the token that follows an omitted token. - *
- * Default: false.
- */
- public void setEnablePositionIncrements(boolean enable) {
- this.enablePositionIncrements = enable;
- }
-
- /**
- * @see #setEnablePositionIncrements(boolean)
- */
- public boolean getEnablePositionIncrements() {
- return enablePositionIncrements;
- }
-
- /**
- * Sets the boolean operator of the QueryParser.
- * In default mode (
- * Depending on settings, prefix term may be lower-cased
- * automatically. It will not go through the default Analyzer,
- * however, since normal Analyzers are unlikely to work properly
- * with wildcard templates.
- *
- * Can be overridden by extending classes, to provide custom handling for
- * wildcard queries, which may be necessary due to missing analyzer calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token that contains one or more wild card
- * characters (? or *), but is not simple prefix term
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getWildcardQuery(String field, String termStr) throws ParseException
- {
- if ("*".equals(field)) {
- if ("*".equals(termStr)) return newMatchAllDocsQuery();
- }
- if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?")))
- throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- return newWildcardQuery(t);
- }
-
- /**
- * Factory method for generating a query (similar to
- * {@link #getWildcardQuery}). Called when parser parses an input term
- * token that uses prefix notation; that is, contains a single '*' wildcard
- * character as its last character. Since this is a special case
- * of generic wildcard term, and such a query can be optimized easily,
- * this usually results in a different query object.
- *
- * Depending on settings, a prefix term may be lower-cased
- * automatically. It will not go through the default Analyzer,
- * however, since normal Analyzers are unlikely to work properly
- * with wildcard templates.
- *
- * Can be overridden by extending classes, to provide custom handling for
- * wild card queries, which may be necessary due to missing analyzer calls.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- * (without trailing '*' character!)
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getPrefixQuery(String field, String termStr) throws ParseException
- {
- if (!allowLeadingWildcard && termStr.startsWith("*"))
- throw new ParseException("'*' not allowed as first character in PrefixQuery");
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- return newPrefixQuery(t);
- }
-
- /**
- * Factory method for generating a query (similar to
- * {@link #getWildcardQuery}). Called when parser parses
- * an input term token that has the fuzzy suffix (~) appended.
- *
- * @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
- *
- * @return Resulting {@link Query} built for the term
- * @exception ParseException throw in overridden method to disallow
- */
- protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
- {
- if (lowercaseExpandedTerms) {
- termStr = termStr.toLowerCase();
- }
- Term t = new Term(field, termStr);
- return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
- }
-
- /**
- * Returns a String where the escape char has been
- * removed, or kept only once if there was a double escape.
- *
- * Supports escaped unicode characters, e. g. translates
- * OR_OPERATOR
) terms without any modifiers
- * are considered optional: for example capital of Hungary
is equal to
- * capital OR of OR Hungary
.
- * In AND_OPERATOR
mode terms are considered to be in conjunction: the
- * above mentioned query is parsed as capital AND of AND Hungary
- */
- public void setDefaultOperator(Operator op) {
- this.operator = op;
- }
-
-
- /**
- * Gets implicit operator setting, which will be either AND_OPERATOR
- * or OR_OPERATOR.
- */
- public Operator getDefaultOperator() {
- return operator;
- }
-
-
- /**
- * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
- * lower-cased or not. Default is true
.
- */
- public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
- this.lowercaseExpandedTerms = lowercaseExpandedTerms;
- }
-
-
- /**
- * @see #setLowercaseExpandedTerms(boolean)
- */
- public boolean getLowercaseExpandedTerms() {
- return lowercaseExpandedTerms;
- }
-
- /**
- * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
- * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
- * a) Runs faster b) Does not have the scarcity of terms unduly influence score
- * c) avoids any "TooManyBooleanClauses" exception.
- * However, if your application really needs to use the
- * old-fashioned BooleanQuery expansion rewriting and the above
- * points are not relevant then use this to change
- * the rewrite method.
- */
- public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
- multiTermRewriteMethod = method;
- }
-
-
- /**
- * @see #setMultiTermRewriteMethod
- */
- public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
- return multiTermRewriteMethod;
- }
-
- /**
- * Set locale used by date range parsing.
- */
- public void setLocale(Locale locale) {
- this.locale = locale;
- }
-
- /**
- * Returns current locale, allowing access by subclasses.
- */
- public Locale getLocale() {
- return locale;
- }
-
- /**
- * Sets the default date resolution used by RangeQueries for fields for which no
- * specific date resolutions has been set. Field specific resolutions can be set
- * with {@link #setDateResolution(String, DateTools.Resolution)}.
- *
- * @param dateResolution the default date resolution to set
- */
- public void setDateResolution(DateTools.Resolution dateResolution) {
- this.dateResolution = dateResolution;
- }
-
- /**
- * Sets the date resolution used by RangeQueries for a specific field.
- *
- * @param fieldName field for which the date resolution is to be set
- * @param dateResolution date resolution to set
- */
- public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) {
- if (fieldName == null) {
- throw new IllegalArgumentException("Field cannot be null.");
- }
-
- if (fieldToDateResolution == null) {
- // lazily initialize HashMap
- fieldToDateResolution = new HashMap\\u0041
to A
.
- *
- */
- private String discardEscapeChar(String input) throws ParseException {
- // Create char array to hold unescaped char sequence
- char[] output = new char[input.length()];
-
- // The length of the output can be less than the input
- // due to discarded escape chars. This variable holds
- // the actual length of the output
- int length = 0;
-
- // We remember whether the last processed character was
- // an escape character
- boolean lastCharWasEscapeChar = false;
-
- // The multiplier the current unicode digit must be multiplied with.
- // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
- int codePointMultiplier = 0;
-
- // Used to calculate the codepoint of the escaped unicode character
- int codePoint = 0;
-
- for (int i = 0; i < input.length(); i++) {
- char curChar = input.charAt(i);
- if (codePointMultiplier > 0) {
- codePoint += hexToInt(curChar) * codePointMultiplier;
- codePointMultiplier >>>= 4;
- if (codePointMultiplier == 0) {
- output[length++] = (char)codePoint;
- codePoint = 0;
- }
- } else if (lastCharWasEscapeChar) {
- if (curChar == 'u') {
- // found an escaped unicode character
- codePointMultiplier = 16 * 16 * 16;
- } else {
- // this character was escaped
- output[length] = curChar;
- length++;
- }
- lastCharWasEscapeChar = false;
- } else {
- if (curChar == '\\') {
- lastCharWasEscapeChar = true;
- } else {
- output[length] = curChar;
- length++;
- }
- }
- }
-
- if (codePointMultiplier > 0) {
- throw new ParseException("Truncated unicode escape sequence.");
- }
-
- if (lastCharWasEscapeChar) {
- throw new ParseException("Term can not end with escape character.");
- }
-
- return new String(output, 0, length);
- }
-
- /** Returns the numeric value of the hexadecimal character */
- private static final int hexToInt(char c) throws ParseException {
- if ('0' <= c && c <= '9') {
- return c - '0';
- } else if ('a' <= c && c <= 'f'){
- return c - 'a' + 10;
- } else if ('A' <= c && c <= 'F') {
- return c - 'A' + 10;
- } else {
- throw new ParseException("None-hex character in unicode escape sequence: " + c);
- }
- }
-
- /**
- * Returns a String where those characters that QueryParser
- * expects to be escaped are escaped by a preceding \
.
- */
- public static String escape(String s) {
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- // These characters are part of the query syntax and must be escaped
- if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
- || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
- || c == '*' || c == '?' || c == '|' || c == '&') {
- sb.append('\\');
- }
- sb.append(c);
- }
- return sb.toString();
- }
-
- /**
- * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
- * Usage:
- * java org.apache.lucene.queryParser.QueryParser <input>
- */
- public static void main(String[] args) throws Exception {
- if (args.length == 0) {
- System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
- System.exit(0);
- }
- QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
- new org.apache.lucene.analysis.SimpleAnalyzer());
- Query q = qp.parse(args[0]);
- System.out.println(q.toString("field"));
- }
-}
-
-PARSER_END(QueryParser)
-
-/* ***************** */
-/* Token Definitions */
-/* ***************** */
-
-<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// every character that follows a backslash is considered as an escaped character
-| <#_ESCAPED_CHAR: "\\" ~[] >
-| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
-| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
-}
-
-