X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj new file mode 100644 index 0000000..908edd0 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -0,0 +1,1440 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(QueryParser) + +package org.apache.lucene.queryParser; + +import java.io.IOException; +import java.io.StringReader; +import java.text.Collator; +import java.text.DateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.DateField; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.Version; +import org.apache.lucene.util.VirtualMethod; + +/** + * This class is generated by JavaCC. The most important method is + * {@link #parse(String)}. + * + * The syntax for query strings is as follows: + * A Query is a series of clauses. + * A clause may be prefixed by: + * + * + * A clause may be either: + * + * + * Thus, in BNF, the query grammar is: + *
+ *   Query  ::= ( Clause )*
+ *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+ * 
+ * + *

+ * Examples of appropriately formatted queries can be found in the query syntax + * documentation. + *

+ * + *

+ * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. + * date:[6/1/2005 TO 6/4/2005] produces a range query that searches + * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + * of the accepted input depends on {@link #setLocale(Locale) the locale}. + * By default a date is converted into a search term using the deprecated + * {@link DateField} for compatibility reasons. + * To use the new {@link DateTools} to convert dates, a + * {@link org.apache.lucene.document.DateTools.Resolution} has to be set. + *

+ *

+ * The date resolution that shall be used for RangeQueries can be set + * using {@link #setDateResolution(DateTools.Resolution)} + * or {@link #setDateResolution(String, DateTools.Resolution)}. The former + * sets the default date resolution for all fields, whereas the latter can + * be used to set field specific date resolutions. Field specific date + * resolutions take, if set, precedence over the default date resolution. + *

+ *

+ * If you use neither {@link DateField} nor {@link DateTools} in your + * index, you can create your own + * query parser that inherits QueryParser and overwrites + * {@link #getRangeQuery(String, String, String, boolean)} to + * use a different method for date conversion. + *

+ * + *

Note that QueryParser is not thread-safe.

+ * + *

NOTE: there is a new QueryParser in contrib, which matches + * the same syntax as this class, but is more modular, + * enabling substantial customization to how a query is created. + * + * + *

NOTE: You must specify the required {@link Version} + * compatibility when creating QueryParser: + *

+ */ +public class QueryParser { + + private static final int CONJ_NONE = 0; + private static final int CONJ_AND = 1; + private static final int CONJ_OR = 2; + + private static final int MOD_NONE = 0; + private static final int MOD_NOT = 10; + private static final int MOD_REQ = 11; + + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /** Alternative form of QueryParser.Operator.AND */ + public static final Operator AND_OPERATOR = Operator.AND; + /** Alternative form of QueryParser.Operator.OR */ + public static final Operator OR_OPERATOR = Operator.OR; + + /** The actual operator that parser uses to combine query terms */ + private Operator operator = OR_OPERATOR; + + boolean lowercaseExpandedTerms = true; + MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + boolean allowLeadingWildcard = false; + boolean enablePositionIncrements = true; + + Analyzer analyzer; + String field; + int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + Locale locale = Locale.getDefault(); + + // the default date resolution + DateTools.Resolution dateResolution = null; + // maps field names to date resolutions + Map fieldToDateResolution = null; + + // The collator to use when determining range inclusion, + // for use when constructing RangeQuerys. + Collator rangeCollator = null; + + /** @deprecated remove when getFieldQuery is removed */ + @Deprecated + private static final VirtualMethod getFieldQueryMethod = + new VirtualMethod(QueryParser.class, "getFieldQuery", String.class, String.class); + /** @deprecated remove when getFieldQuery is removed */ + @Deprecated + private static final VirtualMethod getFieldQueryWithQuotedMethod = + new VirtualMethod(QueryParser.class, "getFieldQuery", String.class, String.class, boolean.class); + /** @deprecated remove when getFieldQuery is removed */ + @Deprecated + private final boolean hasNewAPI = + VirtualMethod.compareImplementationDistance(getClass(), + getFieldQueryWithQuotedMethod, getFieldQueryMethod) >= 0; // its ok for both to be overridden + + private boolean autoGeneratePhraseQueries; + + /** The default operator for parsing queries. + * Use {@link QueryParser#setDefaultOperator} to change it. + */ + static public enum Operator { OR, AND } + + /** Constructs a query parser. + * @param matchVersion Lucene version to match. See above. + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public QueryParser(Version matchVersion, String f, Analyzer a) { + this(new FastCharStream(new StringReader(""))); + analyzer = a; + field = f; + if (matchVersion.onOrAfter(Version.LUCENE_29)) { + enablePositionIncrements = true; + } else { + enablePositionIncrements = false; + } + if (matchVersion.onOrAfter(Version.LUCENE_31)) { + setAutoGeneratePhraseQueries(false); + } else { + setAutoGeneratePhraseQueries(true); + } + } + + /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public Query parse(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + Query res = TopLevelQuery(field); + return res!=null ? res : newBooleanQuery(false); + } + catch (ParseException tme) { + // rethrow to include the original query: + ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); + e.initCause(tme); + throw e; + } + catch (TokenMgrError tme) { + ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage()); + e.initCause(tme); + throw e; + } + catch (BooleanQuery.TooManyClauses tmc) { + ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses"); + e.initCause(tmc); + throw e; + } + } + + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the field. + */ + public String getField() { + return field; + } + + /** + * @see #setAutoGeneratePhraseQueries(boolean) + */ + public final boolean getAutoGeneratePhraseQueries() { + return autoGeneratePhraseQueries; + } + + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

+ * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + */ + public final void setAutoGeneratePhraseQueries(boolean value) { + if (value == false && !hasNewAPI) + throw new IllegalArgumentException("You must implement the new API: getFieldQuery(String,String,boolean)" + + " to use setAutoGeneratePhraseQueries(false)"); + this.autoGeneratePhraseQueries = value; + } + + /** + * Get the minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + + /** + * Set the minimum similarity for fuzzy queries. + * Default is 0.5f. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + + /** + * Sets the default slop for phrases. If zero, then exact phrase matches + * are required. Default value is zero. + */ + public void setPhraseSlop(int phraseSlop) { + this.phraseSlop = phraseSlop; + } + + /** + * Gets the default slop for phrases. + */ + public int getPhraseSlop() { + return phraseSlop; + } + + + /** + * Set to true to allow leading wildcard characters. + *

+ * When set, * or ? are allowed as + * the first character of a PrefixQuery and WildcardQuery. + * Note that this can produce very slow + * queries on big indexes. + *

+ * Default: false. + */ + public void setAllowLeadingWildcard(boolean allowLeadingWildcard) { + this.allowLeadingWildcard = allowLeadingWildcard; + } + + /** + * @see #setAllowLeadingWildcard(boolean) + */ + public boolean getAllowLeadingWildcard() { + return allowLeadingWildcard; + } + + /** + * Set to true to enable position increments in result query. + *

+ * When set, result phrase and multi-phrase queries will + * be aware of position increments. + * Useful when e.g. a StopFilter increases the position increment of + * the token that follows an omitted token. + *

+ * Default: false. + */ + public void setEnablePositionIncrements(boolean enable) { + this.enablePositionIncrements = enable; + } + + /** + * @see #setEnablePositionIncrements(boolean) + */ + public boolean getEnablePositionIncrements() { + return enablePositionIncrements; + } + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (OR_OPERATOR) terms without any modifiers + * are considered optional: for example capital of Hungary is equal to + * capital OR of OR Hungary.
+ * In AND_OPERATOR mode terms are considered to be in conjunction: the + * above mentioned query is parsed as capital AND of AND Hungary + */ + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { + return operator; + } + + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is true. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; + } + + /** + * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + * a) Runs faster b) Does not have the scarcity of terms unduly influence score + * c) avoids any "TooManyBooleanClauses" exception. + * However, if your application really needs to use the + * old-fashioned BooleanQuery expansion rewriting and the above + * points are not relevant then use this to change + * the rewrite method. + */ + public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { + multiTermRewriteMethod = method; + } + + + /** + * @see #setMultiTermRewriteMethod + */ + public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { + return multiTermRewriteMethod; + } + + /** + * Set locale used by date range parsing. + */ + public void setLocale(Locale locale) { + this.locale = locale; + } + + /** + * Returns current locale, allowing access by subclasses. + */ + public Locale getLocale() { + return locale; + } + + /** + * Sets the default date resolution used by RangeQueries for fields for which no + * specific date resolutions has been set. Field specific resolutions can be set + * with {@link #setDateResolution(String, DateTools.Resolution)}. + * + * @param dateResolution the default date resolution to set + */ + public void setDateResolution(DateTools.Resolution dateResolution) { + this.dateResolution = dateResolution; + } + + /** + * Sets the date resolution used by RangeQueries for a specific field. + * + * @param fieldName field for which the date resolution is to be set + * @param dateResolution date resolution to set + */ + public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { + if (fieldName == null) { + throw new IllegalArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) { + // lazily initialize HashMap + fieldToDateResolution = new HashMap(); + } + + fieldToDateResolution.put(fieldName, dateResolution); + } + + /** + * Returns the date resolution that is used by RangeQueries for the given field. + * Returns null, if no default or field specific date resolution has been set + * for the given field. + * + */ + public DateTools.Resolution getDateResolution(String fieldName) { + if (fieldName == null) { + throw new IllegalArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) { + // no field specific date resolutions set; return default date resolution instead + return this.dateResolution; + } + + DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); + if (resolution == null) { + // no date resolutions set for the given field; return default date resolution instead + resolution = this.dateResolution; + } + + return resolution; + } + + /** + * Sets the collator used to determine index term inclusion in ranges + * for RangeQuerys. + *

+ * WARNING: Setting the rangeCollator to a non-null + * collator using this method will cause every single index Term in the + * Field referenced by lowerTerm and/or upperTerm to be examined. + * Depending on the number of index Terms in this Field, the operation could + * be very slow. + * + * @param rc the collator to use when constructing RangeQuerys + */ + public void setRangeCollator(Collator rc) { + rangeCollator = rc; + } + + /** + * @return the collator used to determine index term inclusion in ranges + * for RangeQuerys. + */ + public Collator getRangeCollator() { + return rangeCollator; + } + + protected void addClause(List clauses, int conj, int mods, Query q) { + boolean required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.size() > 0 && conj == CONJ_AND) { + BooleanClause c = clauses.get(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); + } + + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = clauses.get(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + if (operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); + } + + /** + * @deprecated Use {@link #getFieldQuery(String,String,boolean)} instead. + */ + @Deprecated + protected Query getFieldQuery(String field, String queryText) throws ParseException { + // treat the text as if it was quoted, to drive phrase logic with old versions. + return getFieldQuery(field, queryText, true); + } + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + TokenStream source; + try { + source = analyzer.reusableTokenStream(field, new StringReader(queryText)); + source.reset(); + } catch (IOException e) { + source = analyzer.tokenStream(field, new StringReader(queryText)); + } + CachingTokenFilter buffer = new CachingTokenFilter(source); + CharTermAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; + int numTokens = 0; + + boolean success = false; + try { + buffer.reset(); + success = true; + } catch (IOException e) { + // success==false if we hit an exception + } + if (success) { + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + } + + int positionCount = 0; + boolean severalTokensAtSamePosition = false; + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + try { + // rewind the buffer stream + buffer.reset(); + + // close original stream - all tokens buffered + source.close(); + } + catch (IOException e) { + // ignore + } + + if (numTokens == 0) + return null; + else if (numTokens == 1) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + return newTermQuery(new Term(field, term)); + } else { + if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { + if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { + // no phrase query: + BooleanQuery q = newBooleanQuery(positionCount == 1); + + BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? + BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; + + for (int i = 0; i < numTokens; i++) { + String term = null; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + Query currentQuery = newTermQuery( + new Term(field, term)); + q.add(currentQuery, occur); + } + return q; + } + else { + // phrase query: + MultiPhraseQuery mpq = newMultiPhraseQuery(); + mpq.setSlop(phraseSlop); + List multiTerms = new ArrayList(); + int position = -1; + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[0]),position); + } else { + mpq.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, term)); + } + if (enablePositionIncrements) { + mpq.add(multiTerms.toArray(new Term[0]),position); + } else { + mpq.add(multiTerms.toArray(new Term[0])); + } + return mpq; + } + } + else { + PhraseQuery pq = newPhraseQuery(); + pq.setSlop(phraseSlop); + int position = -1; + + + for (int i = 0; i < numTokens; i++) { + String term = null; + int positionIncrement = 1; + + try { + boolean hasNext = buffer.incrementToken(); + assert hasNext == true; + term = termAtt.toString(); + if (posIncrAtt != null) { + positionIncrement = posIncrAtt.getPositionIncrement(); + } + } catch (IOException e) { + // safe to ignore, because we know the number of tokens + } + + if (enablePositionIncrements) { + position += positionIncrement; + pq.add(new Term(field, term),position); + } else { + pq.add(new Term(field, term)); + } + } + return pq; + } + } + } + + + + /** + * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. + * This method may be overridden, for example, to return + * a SpanNearQuery instead of a PhraseQuery. + * + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = hasNewAPI ? getFieldQuery(field, queryText, true) : getFieldQuery(field, queryText); + + if (query instanceof PhraseQuery) { + ((PhraseQuery) query).setSlop(slop); + } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } + + return query; + } + + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRangeQuery(String field, + String part1, + String part2, + boolean inclusive) throws ParseException + { + if (lowercaseExpandedTerms) { + part1 = part1.toLowerCase(); + part2 = part2.toLowerCase(); + } + try { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); + df.setLenient(true); + Date d1 = df.parse(part1); + Date d2 = df.parse(part2); + if (inclusive) { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + Calendar cal = Calendar.getInstance(locale); + cal.setTime(d2); + cal.set(Calendar.HOUR_OF_DAY, 23); + cal.set(Calendar.MINUTE, 59); + cal.set(Calendar.SECOND, 59); + cal.set(Calendar.MILLISECOND, 999); + d2 = cal.getTime(); + } + DateTools.Resolution resolution = getDateResolution(field); + if (resolution == null) { + // no default or field specific date resolution has been set, + // use deprecated DateField to maintain compatibility with + // pre-1.9 Lucene versions. + part1 = DateField.dateToString(d1); + part2 = DateField.dateToString(d2); + } else { + part1 = DateTools.dateToString(d1, resolution); + part2 = DateTools.dateToString(d2, resolution); + } + } + catch (Exception e) { } + + return newRangeQuery(field, part1, part2, inclusive); + } + + /** + * Builds a new BooleanQuery instance + * @param disableCoord disable coord + * @return new BooleanQuery instance + */ + protected BooleanQuery newBooleanQuery(boolean disableCoord) { + return new BooleanQuery(disableCoord); + } + + /** + * Builds a new BooleanClause instance + * @param q sub query + * @param occur how this clause should occur when matching documents + * @return new BooleanClause instance + */ + protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) { + return new BooleanClause(q, occur); + } + + /** + * Builds a new TermQuery instance + * @param term term + * @return new TermQuery instance + */ + protected Query newTermQuery(Term term){ + return new TermQuery(term); + } + + /** + * Builds a new PhraseQuery instance + * @return new PhraseQuery instance + */ + protected PhraseQuery newPhraseQuery(){ + return new PhraseQuery(); + } + + /** + * Builds a new MultiPhraseQuery instance + * @return new MultiPhraseQuery instance + */ + protected MultiPhraseQuery newMultiPhraseQuery(){ + return new MultiPhraseQuery(); + } + + /** + * Builds a new PrefixQuery instance + * @param prefix Prefix term + * @return new PrefixQuery instance + */ + protected Query newPrefixQuery(Term prefix){ + PrefixQuery query = new PrefixQuery(prefix); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Builds a new FuzzyQuery instance + * @param term Term + * @param minimumSimilarity minimum similarity + * @param prefixLength prefix length + * @return new FuzzyQuery Instance + */ + protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { + // FuzzyQuery doesn't yet allow constant score rewrite + return new FuzzyQuery(term,minimumSimilarity,prefixLength); + } + + /** + * Builds a new TermRangeQuery instance + * @param field Field + * @param part1 min + * @param part2 max + * @param inclusive true if range is inclusive + * @return new TermRangeQuery instance + */ + protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) { + final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Builds a new MatchAllDocsQuery instance + * @return new MatchAllDocsQuery instance + */ + protected Query newMatchAllDocsQuery() { + return new MatchAllDocsQuery(); + } + + /** + * Builds a new WildcardQuery instance + * @param t wildcard term + * @return new WildcardQuery instance + */ + protected Query newWildcardQuery(Term t) { + WildcardQuery query = new WildcardQuery(t); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses List that contains {@link BooleanClause} instances + * to join. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(List clauses) throws ParseException { + return getBooleanQuery(clauses, false); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses List that contains {@link BooleanClause} instances + * to join. + * @param disableCoord true if coord scoring should be disabled. + * + * @return Resulting {@link Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(List clauses, boolean disableCoord) + throws ParseException + { + if (clauses.size()==0) { + return null; // all clause words were filtered away by the analyzer. + } + BooleanQuery query = newBooleanQuery(disableCoord); + for(final BooleanClause clause: clauses) { + query.add(clause); + } + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *

+ * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * wildcard queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getWildcardQuery(String field, String termStr) throws ParseException + { + if ("*".equals(field)) { + if ("*".equals(termStr)) return newMatchAllDocsQuery(); + } + if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) + throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newWildcardQuery(t); + } + + /** + * Factory method for generating a query (similar to + * {@link #getWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wildcard + * character as its last character. Since this is a special case + * of generic wildcard term, and such a query can be optimized easily, + * this usually results in a different query object. + *

+ * Depending on settings, a prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * wild card queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (without trailing '*' character!) + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (!allowLeadingWildcard && termStr.startsWith("*")) + throw new ParseException("'*' not allowed as first character in PrefixQuery"); + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newPrefixQuery(t); + } + + /** + * Factory method for generating a query (similar to + * {@link #getWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + } + + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + * + * Supports escaped unicode characters, e. g. translates + * \\u0041 to A. + * + */ + private String discardEscapeChar(String input) throws ParseException { + // Create char array to hold unescaped char sequence + char[] output = new char[input.length()]; + + // The length of the output can be less than the input + // due to discarded escape chars. This variable holds + // the actual length of the output + int length = 0; + + // We remember whether the last processed character was + // an escape character + boolean lastCharWasEscapeChar = false; + + // The multiplier the current unicode digit must be multiplied with. + // E. g. the first digit must be multiplied with 16^3, the second with 16^2... + int codePointMultiplier = 0; + + // Used to calculate the codepoint of the escaped unicode character + int codePoint = 0; + + for (int i = 0; i < input.length(); i++) { + char curChar = input.charAt(i); + if (codePointMultiplier > 0) { + codePoint += hexToInt(curChar) * codePointMultiplier; + codePointMultiplier >>>= 4; + if (codePointMultiplier == 0) { + output[length++] = (char)codePoint; + codePoint = 0; + } + } else if (lastCharWasEscapeChar) { + if (curChar == 'u') { + // found an escaped unicode character + codePointMultiplier = 16 * 16 * 16; + } else { + // this character was escaped + output[length] = curChar; + length++; + } + lastCharWasEscapeChar = false; + } else { + if (curChar == '\\') { + lastCharWasEscapeChar = true; + } else { + output[length] = curChar; + length++; + } + } + } + + if (codePointMultiplier > 0) { + throw new ParseException("Truncated unicode escape sequence."); + } + + if (lastCharWasEscapeChar) { + throw new ParseException("Term can not end with escape character."); + } + + return new String(output, 0, length); + } + + /** Returns the numeric value of the hexadecimal character */ + private static final int hexToInt(char c) throws ParseException { + if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('a' <= c && c <= 'f'){ + return c - 'a' + 10; + } else if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } else { + throw new ParseException("None-hex character in unicode escape sequence: " + c); + } + } + + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped by a preceding \. + */ + public static String escape(String s) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // These characters are part of the query syntax and must be escaped + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?' || c == '|' || c == '&') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + + /** + * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. + * Usage:
+ * java org.apache.lucene.queryParser.QueryParser <input> + */ + public static void main(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); + System.exit(0); + } + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", + new org.apache.lucene.analysis.SimpleAnalyzer()); + Query q = qp.parse(args[0]); + System.out.println(q.toString("field")); + } +} + +PARSER_END(QueryParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +// every character that follows a backslash is considered as an escaped character +| <#_ESCAPED_CHAR: "\\" ~[] > +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ] + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > +| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > +} + + SKIP : { + < <_WHITESPACE>> +} + + TOKEN : { + +| +| +| +| +| +| +| +| +| : Boost +| )* "\""> +| (<_TERM_CHAR>)* > +| )+ ( "." (<_NUM_CHAR>)+ )? )? > +| (<_TERM_CHAR>)* "*" ) > +| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| : RangeIn +| : RangeEx +} + + TOKEN : { +)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT +} + + TOKEN : { + +| : DEFAULT +| +| +} + + TOKEN : { + +| : DEFAULT +| +| +} + +// * Query ::= ( Clause )* +// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + +int Conjunction() : { + int ret = CONJ_NONE; +} +{ + [ + { ret = CONJ_AND; } + | { ret = CONJ_OR; } + ] + { return ret; } +} + +int Modifiers() : { + int ret = MOD_NONE; +} +{ + [ + { ret = MOD_REQ; } + | { ret = MOD_NOT; } + | { ret = MOD_NOT; } + ] + { return ret; } +} + +// This makes sure that there is no garbage after the query string +Query TopLevelQuery(String field) : +{ + Query q; +} +{ + q=Query(field) + { + return q; + } +} + +Query Query(String field) : +{ + List clauses = new ArrayList(); + Query q, firstQuery=null; + int conj, mods; +} +{ + mods=Modifiers() q=Clause(field) + { + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) + firstQuery=q; + } + ( + conj=Conjunction() mods=Modifiers() q=Clause(field) + { addClause(clauses, conj, mods, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) + return firstQuery; + else { + return getBooleanQuery(clauses); + } + } +} + +Query Clause(String field) : { + Query q; + Token fieldToken=null, boost=null; +} +{ + [ + LOOKAHEAD(2) + ( + fieldToken= {field=discardEscapeChar(fieldToken.image);} + | {field="*";} + ) + ] + + ( + q=Term(field) + | q=Query(field) ( boost=)? + + ) + { + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + q.setBoost(f); + } catch (Exception ignored) { } + } + return q; + } +} + + +Query Term(String field) : { + Token term, boost=null, fuzzySlop=null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + Query q; +} +{ + ( + ( + term= + | term= { wildcard=true; } + | term= { prefix=true; } + | term= { wildcard=true; } + | term= + ) + [ fuzzySlop= { fuzzy=true; } ] + [ boost= [ fuzzySlop= { fuzzy=true; } ] ] + { + String termImage=discardEscapeChar(term.image); + if (wildcard) { + q = getWildcardQuery(field, termImage); + } else if (prefix) { + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); + } else if (fuzzy) { + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + q = getFuzzyQuery(field, termImage,fms); + } else { + q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage); + } + } + | ( ( goop1=|goop1= ) + [ ] ( goop2=|goop2= ) + ) + [ boost= ] + { + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true); + } + | ( ( goop1=|goop1= ) + [ ] ( goop2=|goop2= ) + ) + [ boost= ] + { + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + + q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false); + } + | term= + [ fuzzySlop= ] + [ boost= ] + { + int s = phraseSlop; + + if (fuzzySlop != null) { + try { + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + } + catch (Exception ignored) { } + } + q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); + } + ) + { + if (boost != null) { + float f = (float) 1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q.setBoost(f); + } + } + return q; + } +}