X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj new file mode 100644 index 0000000..908edd0 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -0,0 +1,1440 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(QueryParser) + +package org.apache.lucene.queryParser; + +import java.io.IOException; +import java.io.StringReader; +import java.text.Collator; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.DateField; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.Version; +import org.apache.lucene.util.VirtualMethod; + +/** + * This class is generated by JavaCC. The most important method is + * {@link #parse(String)}. + * + * The syntax for query strings is as follows: + * A Query is a series of clauses. + * A clause may be prefixed by: + *
+
) or a minus (-
) sign, indicating
+ * that the clause is required or prohibited respectively; or
+ * +
/-
prefix to require any of a set of
+ * terms.
+ * + * Query ::= ( Clause )* + * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + *+ * + *
+ * Examples of appropriately formatted queries can be found in the query syntax + * documentation. + *
+ * + *+ * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. + * date:[6/1/2005 TO 6/4/2005] produces a range query that searches + * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + * of the accepted input depends on {@link #setLocale(Locale) the locale}. + * By default a date is converted into a search term using the deprecated + * {@link DateField} for compatibility reasons. + * To use the new {@link DateTools} to convert dates, a + * {@link org.apache.lucene.document.DateTools.Resolution} has to be set. + *
+ *+ * The date resolution that shall be used for RangeQueries can be set + * using {@link #setDateResolution(DateTools.Resolution)} + * or {@link #setDateResolution(String, DateTools.Resolution)}. The former + * sets the default date resolution for all fields, whereas the latter can + * be used to set field specific date resolutions. Field specific date + * resolutions take, if set, precedence over the default date resolution. + *
+ *+ * If you use neither {@link DateField} nor {@link DateTools} in your + * index, you can create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)} to + * use a different method for date conversion. + *
+ * + *Note that QueryParser is not thread-safe.
+ * + *NOTE: there is a new QueryParser in contrib, which matches + * the same syntax as this class, but is more modular, + * enabling substantial customization to how a query is created. + * + * + *
NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *
+ * Set to false if phrase queries should only be generated when
+ * surrounded by double quotes.
+ */
+ public final void setAutoGeneratePhraseQueries(boolean value) {
+ if (value == false && !hasNewAPI)
+ throw new IllegalArgumentException("You must implement the new API: getFieldQuery(String,String,boolean)"
+ + " to use setAutoGeneratePhraseQueries(false)");
+ this.autoGeneratePhraseQueries = value;
+ }
+
+ /**
+ * Get the minimal similarity for fuzzy queries.
+ */
+ public float getFuzzyMinSim() {
+ return fuzzyMinSim;
+ }
+
+ /**
+ * Set the minimum similarity for fuzzy queries.
+ * Default is 0.5f.
+ */
+ public void setFuzzyMinSim(float fuzzyMinSim) {
+ this.fuzzyMinSim = fuzzyMinSim;
+ }
+
+ /**
+ * Get the prefix length for fuzzy queries.
+ * @return Returns the fuzzyPrefixLength.
+ */
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ /**
+ * Set the prefix length for fuzzy queries. Default is 0.
+ * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
+ */
+ public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyPrefixLength;
+ }
+
+ /**
+ * Sets the default slop for phrases. If zero, then exact phrase matches
+ * are required. Default value is zero.
+ */
+ public void setPhraseSlop(int phraseSlop) {
+ this.phraseSlop = phraseSlop;
+ }
+
+ /**
+ * Gets the default slop for phrases.
+ */
+ public int getPhraseSlop() {
+ return phraseSlop;
+ }
+
+
+ /**
+ * Set to true
to allow leading wildcard characters.
+ *
+ * When set, *
or ?
are allowed as
+ * the first character of a PrefixQuery and WildcardQuery.
+ * Note that this can produce very slow
+ * queries on big indexes.
+ *
+ * Default: false.
+ */
+ public void setAllowLeadingWildcard(boolean allowLeadingWildcard) {
+ this.allowLeadingWildcard = allowLeadingWildcard;
+ }
+
+ /**
+ * @see #setAllowLeadingWildcard(boolean)
+ */
+ public boolean getAllowLeadingWildcard() {
+ return allowLeadingWildcard;
+ }
+
+ /**
+ * Set to true
to enable position increments in result query.
+ *
+ * When set, result phrase and multi-phrase queries will + * be aware of position increments. + * Useful when e.g. a StopFilter increases the position increment of + * the token that follows an omitted token. + *
+ * Default: false.
+ */
+ public void setEnablePositionIncrements(boolean enable) {
+ this.enablePositionIncrements = enable;
+ }
+
+ /**
+ * @see #setEnablePositionIncrements(boolean)
+ */
+ public boolean getEnablePositionIncrements() {
+ return enablePositionIncrements;
+ }
+
+ /**
+ * Sets the boolean operator of the QueryParser.
+ * In default mode (
+ * Depending on settings, prefix term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with wildcard templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * wildcard queries, which may be necessary due to missing analyzer calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains one or more wild card
+ * characters (? or *), but is not simple prefix term
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getWildcardQuery(String field, String termStr) throws ParseException
+ {
+ if ("*".equals(field)) {
+ if ("*".equals(termStr)) return newMatchAllDocsQuery();
+ }
+ if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?")))
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return newWildcardQuery(t);
+ }
+
+ /**
+ * Factory method for generating a query (similar to
+ * {@link #getWildcardQuery}). Called when parser parses an input term
+ * token that uses prefix notation; that is, contains a single '*' wildcard
+ * character as its last character. Since this is a special case
+ * of generic wildcard term, and such a query can be optimized easily,
+ * this usually results in a different query object.
+ *
+ * Depending on settings, a prefix term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with wildcard templates.
+ *
+ * Can be overridden by extending classes, to provide custom handling for
+ * wild card queries, which may be necessary due to missing analyzer calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ * (without trailing '*' character!)
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getPrefixQuery(String field, String termStr) throws ParseException
+ {
+ if (!allowLeadingWildcard && termStr.startsWith("*"))
+ throw new ParseException("'*' not allowed as first character in PrefixQuery");
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return newPrefixQuery(t);
+ }
+
+ /**
+ * Factory method for generating a query (similar to
+ * {@link #getWildcardQuery}). Called when parser parses
+ * an input term token that has the fuzzy suffix (~) appended.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
+ }
+
+ /**
+ * Returns a String where the escape char has been
+ * removed, or kept only once if there was a double escape.
+ *
+ * Supports escaped unicode characters, e. g. translates
+ * OR_OPERATOR
) terms without any modifiers
+ * are considered optional: for example capital of Hungary
is equal to
+ * capital OR of OR Hungary
.
+ * In AND_OPERATOR
mode terms are considered to be in conjunction: the
+ * above mentioned query is parsed as capital AND of AND Hungary
+ */
+ public void setDefaultOperator(Operator op) {
+ this.operator = op;
+ }
+
+
+ /**
+ * Gets implicit operator setting, which will be either AND_OPERATOR
+ * or OR_OPERATOR.
+ */
+ public Operator getDefaultOperator() {
+ return operator;
+ }
+
+
+ /**
+ * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
+ * lower-cased or not. Default is true
.
+ */
+ public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
+ this.lowercaseExpandedTerms = lowercaseExpandedTerms;
+ }
+
+
+ /**
+ * @see #setLowercaseExpandedTerms(boolean)
+ */
+ public boolean getLowercaseExpandedTerms() {
+ return lowercaseExpandedTerms;
+ }
+
+ /**
+ * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
+ * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
+ * a) Runs faster b) Does not have the scarcity of terms unduly influence score
+ * c) avoids any "TooManyBooleanClauses" exception.
+ * However, if your application really needs to use the
+ * old-fashioned BooleanQuery expansion rewriting and the above
+ * points are not relevant then use this to change
+ * the rewrite method.
+ */
+ public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
+ multiTermRewriteMethod = method;
+ }
+
+
+ /**
+ * @see #setMultiTermRewriteMethod
+ */
+ public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
+ return multiTermRewriteMethod;
+ }
+
+ /**
+ * Set locale used by date range parsing.
+ */
+ public void setLocale(Locale locale) {
+ this.locale = locale;
+ }
+
+ /**
+ * Returns current locale, allowing access by subclasses.
+ */
+ public Locale getLocale() {
+ return locale;
+ }
+
+ /**
+ * Sets the default date resolution used by RangeQueries for fields for which no
+ * specific date resolutions has been set. Field specific resolutions can be set
+ * with {@link #setDateResolution(String, DateTools.Resolution)}.
+ *
+ * @param dateResolution the default date resolution to set
+ */
+ public void setDateResolution(DateTools.Resolution dateResolution) {
+ this.dateResolution = dateResolution;
+ }
+
+ /**
+ * Sets the date resolution used by RangeQueries for a specific field.
+ *
+ * @param fieldName field for which the date resolution is to be set
+ * @param dateResolution date resolution to set
+ */
+ public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) {
+ if (fieldName == null) {
+ throw new IllegalArgumentException("Field cannot be null.");
+ }
+
+ if (fieldToDateResolution == null) {
+ // lazily initialize HashMap
+ fieldToDateResolution = new HashMap\\u0041
to A
.
+ *
+ */
+ private String discardEscapeChar(String input) throws ParseException {
+ // Create char array to hold unescaped char sequence
+ char[] output = new char[input.length()];
+
+ // The length of the output can be less than the input
+ // due to discarded escape chars. This variable holds
+ // the actual length of the output
+ int length = 0;
+
+ // We remember whether the last processed character was
+ // an escape character
+ boolean lastCharWasEscapeChar = false;
+
+ // The multiplier the current unicode digit must be multiplied with.
+ // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
+ int codePointMultiplier = 0;
+
+ // Used to calculate the codepoint of the escaped unicode character
+ int codePoint = 0;
+
+ for (int i = 0; i < input.length(); i++) {
+ char curChar = input.charAt(i);
+ if (codePointMultiplier > 0) {
+ codePoint += hexToInt(curChar) * codePointMultiplier;
+ codePointMultiplier >>>= 4;
+ if (codePointMultiplier == 0) {
+ output[length++] = (char)codePoint;
+ codePoint = 0;
+ }
+ } else if (lastCharWasEscapeChar) {
+ if (curChar == 'u') {
+ // found an escaped unicode character
+ codePointMultiplier = 16 * 16 * 16;
+ } else {
+ // this character was escaped
+ output[length] = curChar;
+ length++;
+ }
+ lastCharWasEscapeChar = false;
+ } else {
+ if (curChar == '\\') {
+ lastCharWasEscapeChar = true;
+ } else {
+ output[length] = curChar;
+ length++;
+ }
+ }
+ }
+
+ if (codePointMultiplier > 0) {
+ throw new ParseException("Truncated unicode escape sequence.");
+ }
+
+ if (lastCharWasEscapeChar) {
+ throw new ParseException("Term can not end with escape character.");
+ }
+
+ return new String(output, 0, length);
+ }
+
+ /** Returns the numeric value of the hexadecimal character */
+ private static final int hexToInt(char c) throws ParseException {
+ if ('0' <= c && c <= '9') {
+ return c - '0';
+ } else if ('a' <= c && c <= 'f'){
+ return c - 'a' + 10;
+ } else if ('A' <= c && c <= 'F') {
+ return c - 'A' + 10;
+ } else {
+ throw new ParseException("None-hex character in unicode escape sequence: " + c);
+ }
+ }
+
+ /**
+ * Returns a String where those characters that QueryParser
+ * expects to be escaped are escaped by a preceding \
.
+ */
+ public static String escape(String s) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ // These characters are part of the query syntax and must be escaped
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
+ || c == '*' || c == '?' || c == '|' || c == '&') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
+ * Usage:
+ * java org.apache.lucene.queryParser.QueryParser <input>
+ */
+ public static void main(String[] args) throws Exception {
+ if (args.length == 0) {
+ System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser ");
+ System.exit(0);
+ }
+ QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
+ new org.apache.lucene.analysis.SimpleAnalyzer());
+ Query q = qp.parse(args[0]);
+ System.out.println(q.toString("field"));
+ }
+}
+
+PARSER_END(QueryParser)
+
+/* ***************** */
+/* Token Definitions */
+/* ***************** */
+
+<*> TOKEN : {
+ <#_NUM_CHAR: ["0"-"9"] >
+// every character that follows a backslash is considered as an escaped character
+| <#_ESCAPED_CHAR: "\\" ~[] >
+| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
+ "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
+ | <_ESCAPED_CHAR> ) >
+| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
+| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
+}
+
+