lucene-java-3.5.0/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj

   1 /**
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 options {
  19   STATIC=false;
  20   JAVA_UNICODE_ESCAPE=true;
  21   USER_CHAR_STREAM=true;
  22 }
  23
  24 PARSER_BEGIN(QueryParser)
  25
  26 package org.apache.lucene.queryParser;
  27
  28 import java.io.IOException;
  29 import java.io.StringReader;
  30 import java.text.Collator;
  31 import java.text.DateFormat;
  32 import java.util.ArrayList;
  33 import java.util.Calendar;
  34 import java.util.Date;
  35 import java.util.HashMap;
  36 import java.util.List;
  37 import java.util.Locale;
  38 import java.util.Map;
  39
  40 import org.apache.lucene.analysis.Analyzer;
  41 import org.apache.lucene.analysis.CachingTokenFilter;
  42 import org.apache.lucene.analysis.TokenStream;
  43 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
  44 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  45 import org.apache.lucene.document.DateField;
  46 import org.apache.lucene.document.DateTools;
  47 import org.apache.lucene.index.Term;
  48 import org.apache.lucene.search.BooleanClause;
  49 import org.apache.lucene.search.BooleanQuery;
  50 import org.apache.lucene.search.FuzzyQuery;
  51 import org.apache.lucene.search.MultiTermQuery;
  52 import org.apache.lucene.search.MatchAllDocsQuery;
  53 import org.apache.lucene.search.MultiPhraseQuery;
  54 import org.apache.lucene.search.PhraseQuery;
  55 import org.apache.lucene.search.PrefixQuery;
  56 import org.apache.lucene.search.Query;
  57 import org.apache.lucene.search.TermRangeQuery;
  58 import org.apache.lucene.search.TermQuery;
  59 import org.apache.lucene.search.WildcardQuery;
  60 import org.apache.lucene.util.Version;
  61 import org.apache.lucene.util.VirtualMethod;
  62
  63 /**
  64  * This class is generated by JavaCC.  The most important method is
  65  * {@link #parse(String)}.
  66  *
  67  * The syntax for query strings is as follows:
  68  * A Query is a series of clauses.
  69  * A clause may be prefixed by:
  70  * <ul>
  71  * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
  72  * that the clause is required or prohibited respectively; or
  73  * <li> a term followed by a colon, indicating the field to be searched.
  74  * This enables one to construct queries which search multiple fields.
  75  * </ul>
  76  *
  77  * A clause may be either:
  78  * <ul>
  79  * <li> a term, indicating all the documents that contain this term; or
  80  * <li> a nested query, enclosed in parentheses.  Note that this may be used
  81  * with a <code>+</code>/<code>-</code> prefix to require any of a set of
  82  * terms.
  83  * </ul>
  84  *
  85  * Thus, in BNF, the query grammar is:
  86  * <pre>
  87  *   Query  ::= ( Clause )*
  88  *   Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
  89  * </pre>
  90  *
  91  * <p>
  92  * Examples of appropriately formatted queries can be found in the <a
  93  * href="../../../../../../queryparsersyntax.html">query syntax
  94  * documentation</a>.
  95  * </p>
  96  *
  97  * <p>
  98  * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g.
  99  * <tt>date:[6/1/2005 TO 6/4/2005]</tt> produces a range query that searches
 100  * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format
 101  * of the accepted input depends on {@link #setLocale(Locale) the locale}.
 102  * By default a date is converted into a search term using the deprecated
 103  * {@link DateField} for compatibility reasons.
 104  * To use the new {@link DateTools} to convert dates, a
 105  * {@link org.apache.lucene.document.DateTools.Resolution} has to be set.
 106  * </p>
 107  * <p>
 108  * The date resolution that shall be used for RangeQueries can be set
 109  * using {@link #setDateResolution(DateTools.Resolution)}
 110  * or {@link #setDateResolution(String, DateTools.Resolution)}. The former
 111  * sets the default date resolution for all fields, whereas the latter can
 112  * be used to set field specific date resolutions. Field specific date
 113  * resolutions take, if set, precedence over the default date resolution.
 114  * </p>
 115  * <p>
 116  * If you use neither {@link DateField} nor {@link DateTools} in your
 117  * index, you can create your own
 118  * query parser that inherits QueryParser and overwrites
 119  * {@link #getRangeQuery(String, String, String, boolean)} to
 120  * use a different method for date conversion.
 121  * </p>
 122  *
 123  * <p>Note that QueryParser is <em>not</em> thread-safe.</p>
 124  *
 125  * <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
 126  * the same syntax as this class, but is more modular,
 127  * enabling substantial customization to how a query is created.
 128  *
 129  * <a name="version"/>
 130  * <p><b>NOTE</b>: You must specify the required {@link Version}
 131  * compatibility when creating QueryParser:
 132  * <ul>
 133  *    <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
 134  *         default.
 135  *    <li> As of 3.1, {@link #setAutoGeneratePhraseQueries} is false by
 136  *         default.
 137  * </ul>
 138  */
 139 public class QueryParser {
 140
 141   private static final int CONJ_NONE   = 0;
 142   private static final int CONJ_AND    = 1;
 143   private static final int CONJ_OR     = 2;
 144
 145   private static final int MOD_NONE    = 0;
 146   private static final int MOD_NOT     = 10;
 147   private static final int MOD_REQ     = 11;
 148
 149   // make it possible to call setDefaultOperator() without accessing
 150   // the nested class:
 151   /** Alternative form of QueryParser.Operator.AND */
 152   public static final Operator AND_OPERATOR = Operator.AND;
 153   /** Alternative form of QueryParser.Operator.OR */
 154   public static final Operator OR_OPERATOR = Operator.OR;
 155
 156   /** The actual operator that parser uses to combine query terms */
 157   private Operator operator = OR_OPERATOR;
 158
 159   boolean lowercaseExpandedTerms = true;
 160   MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
 161   boolean allowLeadingWildcard = false;
 162   boolean enablePositionIncrements = true;
 163
 164   Analyzer analyzer;
 165   String field;
 166   int phraseSlop = 0;
 167   float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
 168   int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
 169   Locale locale = Locale.getDefault();
 170
 171   // the default date resolution
 172   DateTools.Resolution dateResolution = null;
 173   // maps field names to date resolutions
 174   Map<String,DateTools.Resolution> fieldToDateResolution = null;
 175
 176   // The collator to use when determining range inclusion,
 177   // for use when constructing RangeQuerys.
 178   Collator rangeCollator = null;
 179
 180   /** @deprecated remove when getFieldQuery is removed */
 181   @Deprecated
 182   private static final VirtualMethod<QueryParser> getFieldQueryMethod =
 183     new VirtualMethod<QueryParser>(QueryParser.class, "getFieldQuery", String.class, String.class);
 184   /** @deprecated remove when getFieldQuery is removed */
 185   @Deprecated
 186   private static final VirtualMethod<QueryParser> getFieldQueryWithQuotedMethod =
 187     new VirtualMethod<QueryParser>(QueryParser.class, "getFieldQuery", String.class, String.class, boolean.class);
 188   /** @deprecated remove when getFieldQuery is removed */
 189   @Deprecated
 190   private final boolean hasNewAPI =
 191     VirtualMethod.compareImplementationDistance(getClass(),
 192         getFieldQueryWithQuotedMethod, getFieldQueryMethod) >= 0; // its ok for both to be overridden
 193
 194   private boolean autoGeneratePhraseQueries;
 195
 196   /** The default operator for parsing queries.
 197    * Use {@link QueryParser#setDefaultOperator} to change it.
 198    */
 199   static public enum Operator { OR, AND }
 200
 201   /** Constructs a query parser.
 202    *  @param matchVersion  Lucene version to match. See <a href="#version">above</a>.
 203    *  @param f  the default field for query terms.
 204    *  @param a   used to find terms in the query text.
 205    */
 206   public QueryParser(Version matchVersion, String f, Analyzer a) {
 207     this(new FastCharStream(new StringReader("")));
 208     analyzer = a;
 209     field = f;
 210     if (matchVersion.onOrAfter(Version.LUCENE_29)) {
 211       enablePositionIncrements = true;
 212     } else {
 213       enablePositionIncrements = false;
 214     }
 215     if (matchVersion.onOrAfter(Version.LUCENE_31)) {
 216       setAutoGeneratePhraseQueries(false);
 217     } else {
 218       setAutoGeneratePhraseQueries(true);
 219     }
 220   }
 221
 222   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
 223    *  @param query  the query string to be parsed.
 224    *  @throws ParseException if the parsing fails
 225    */
 226   public Query parse(String query) throws ParseException {
 227     ReInit(new FastCharStream(new StringReader(query)));
 228     try {
 229       // TopLevelQuery is a Query followed by the end-of-input (EOF)
 230       Query res = TopLevelQuery(field);
 231       return res!=null ? res : newBooleanQuery(false);
 232     }
 233     catch (ParseException tme) {
 234       // rethrow to include the original query:
 235       ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage());
 236       e.initCause(tme);
 237       throw e;
 238     }
 239     catch (TokenMgrError tme) {
 240       ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage());
 241       e.initCause(tme);
 242       throw e;
 243     }
 244     catch (BooleanQuery.TooManyClauses tmc) {
 245       ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses");
 246       e.initCause(tmc);
 247       throw e;
 248     }
 249   }
 250
 251    /**
 252    * @return Returns the analyzer.
 253    */
 254   public Analyzer getAnalyzer() {
 255     return analyzer;
 256   }
 257
 258   /**
 259    * @return Returns the field.
 260    */
 261   public String getField() {
 262     return field;
 263   }
 264
 265   /**
 266    * @see #setAutoGeneratePhraseQueries(boolean)
 267    */
 268   public final boolean getAutoGeneratePhraseQueries() {
 269     return autoGeneratePhraseQueries;
 270   }
 271
 272   /**
 273    * Set to true if phrase queries will be automatically generated
 274    * when the analyzer returns more than one term from whitespace
 275    * delimited text.
 276    * NOTE: this behavior may not be suitable for all languages.
 277    * <p>
 278    * Set to false if phrase queries should only be generated when
 279    * surrounded by double quotes.
 280    */
 281   public final void setAutoGeneratePhraseQueries(boolean value) {
 282     if (value == false && !hasNewAPI)
 283       throw new IllegalArgumentException("You must implement the new API: getFieldQuery(String,String,boolean)"
 284        + " to use setAutoGeneratePhraseQueries(false)");
 285     this.autoGeneratePhraseQueries = value;
 286   }
 287
 288    /**
 289    * Get the minimal similarity for fuzzy queries.
 290    */
 291   public float getFuzzyMinSim() {
 292       return fuzzyMinSim;
 293   }
 294
 295   /**
 296    * Set the minimum similarity for fuzzy queries.
 297    * Default is 0.5f.
 298    */
 299   public void setFuzzyMinSim(float fuzzyMinSim) {
 300       this.fuzzyMinSim = fuzzyMinSim;
 301   }
 302
 303    /**
 304    * Get the prefix length for fuzzy queries.
 305    * @return Returns the fuzzyPrefixLength.
 306    */
 307   public int getFuzzyPrefixLength() {
 308     return fuzzyPrefixLength;
 309   }
 310
 311   /**
 312    * Set the prefix length for fuzzy queries. Default is 0.
 313    * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
 314    */
 315   public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
 316     this.fuzzyPrefixLength = fuzzyPrefixLength;
 317   }
 318
 319   /**
 320    * Sets the default slop for phrases.  If zero, then exact phrase matches
 321    * are required.  Default value is zero.
 322    */
 323   public void setPhraseSlop(int phraseSlop) {
 324     this.phraseSlop = phraseSlop;
 325   }
 326
 327   /**
 328    * Gets the default slop for phrases.
 329    */
 330   public int getPhraseSlop() {
 331     return phraseSlop;
 332   }
 333
 334
 335   /**
 336    * Set to <code>true</code> to allow leading wildcard characters.
 337    * <p>
 338    * When set, <code>*</code> or <code>?</code> are allowed as
 339    * the first character of a PrefixQuery and WildcardQuery.
 340    * Note that this can produce very slow
 341    * queries on big indexes.
 342    * <p>
 343    * Default: false.
 344    */
 345   public void setAllowLeadingWildcard(boolean allowLeadingWildcard) {
 346     this.allowLeadingWildcard = allowLeadingWildcard;
 347   }
 348
 349   /**
 350    * @see #setAllowLeadingWildcard(boolean)
 351    */
 352   public boolean getAllowLeadingWildcard() {
 353     return allowLeadingWildcard;
 354   }
 355
 356   /**
 357    * Set to <code>true</code> to enable position increments in result query.
 358    * <p>
 359    * When set, result phrase and multi-phrase queries will
 360    * be aware of position increments.
 361    * Useful when e.g. a StopFilter increases the position increment of
 362    * the token that follows an omitted token.
 363    * <p>
 364    * Default: false.
 365    */
 366   public void setEnablePositionIncrements(boolean enable) {
 367     this.enablePositionIncrements = enable;
 368   }
 369
 370   /**
 371    * @see #setEnablePositionIncrements(boolean)
 372    */
 373   public boolean getEnablePositionIncrements() {
 374     return enablePositionIncrements;
 375   }
 376
 377   /**
 378    * Sets the boolean operator of the QueryParser.
 379    * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
 380    * are considered optional: for example <code>capital of Hungary</code> is equal to
 381    * <code>capital OR of OR Hungary</code>.<br/>
 382    * In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
 383    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
 384    */
 385   public void setDefaultOperator(Operator op) {
 386     this.operator = op;
 387   }
 388
 389
 390   /**
 391    * Gets implicit operator setting, which will be either AND_OPERATOR
 392    * or OR_OPERATOR.
 393    */
 394   public Operator getDefaultOperator() {
 395     return operator;
 396   }
 397
 398
 399   /**
 400    * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
 401    * lower-cased or not.  Default is <code>true</code>.
 402    */
 403   public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
 404     this.lowercaseExpandedTerms = lowercaseExpandedTerms;
 405   }
 406
 407
 408   /**
 409    * @see #setLowercaseExpandedTerms(boolean)
 410    */
 411   public boolean getLowercaseExpandedTerms() {
 412     return lowercaseExpandedTerms;
 413   }
 414
 415   /**
 416    * By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
 417    * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
 418    * a) Runs faster b) Does not have the scarcity of terms unduly influence score
 419    * c) avoids any "TooManyBooleanClauses" exception.
 420    * However, if your application really needs to use the
 421    * old-fashioned BooleanQuery expansion rewriting and the above
 422    * points are not relevant then use this to change
 423    * the rewrite method.
 424    */
 425   public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
 426     multiTermRewriteMethod = method;
 427   }
 428
 429
 430   /**
 431    * @see #setMultiTermRewriteMethod
 432    */
 433   public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
 434     return multiTermRewriteMethod;
 435   }
 436
 437   /**
 438    * Set locale used by date range parsing.
 439    */
 440   public void setLocale(Locale locale) {
 441     this.locale = locale;
 442   }
 443
 444   /**
 445    * Returns current locale, allowing access by subclasses.
 446    */
 447   public Locale getLocale() {
 448     return locale;
 449   }
 450
 451   /**
 452    * Sets the default date resolution used by RangeQueries for fields for which no
 453    * specific date resolutions has been set. Field specific resolutions can be set
 454    * with {@link #setDateResolution(String, DateTools.Resolution)}.
 455    *
 456    * @param dateResolution the default date resolution to set
 457    */
 458   public void setDateResolution(DateTools.Resolution dateResolution) {
 459     this.dateResolution = dateResolution;
 460   }
 461
 462   /**
 463    * Sets the date resolution used by RangeQueries for a specific field.
 464    *
 465    * @param fieldName field for which the date resolution is to be set
 466    * @param dateResolution date resolution to set
 467    */
 468   public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) {
 469     if (fieldName == null) {
 470       throw new IllegalArgumentException("Field cannot be null.");
 471     }
 472
 473     if (fieldToDateResolution == null) {
 474       // lazily initialize HashMap
 475       fieldToDateResolution = new HashMap<String,DateTools.Resolution>();
 476     }
 477
 478     fieldToDateResolution.put(fieldName, dateResolution);
 479   }
 480
 481   /**
 482    * Returns the date resolution that is used by RangeQueries for the given field.
 483    * Returns null, if no default or field specific date resolution has been set
 484    * for the given field.
 485    *
 486    */
 487   public DateTools.Resolution getDateResolution(String fieldName) {
 488     if (fieldName == null) {
 489       throw new IllegalArgumentException("Field cannot be null.");
 490     }
 491
 492     if (fieldToDateResolution == null) {
 493       // no field specific date resolutions set; return default date resolution instead
 494       return this.dateResolution;
 495     }
 496
 497     DateTools.Resolution resolution = fieldToDateResolution.get(fieldName);
 498     if (resolution == null) {
 499       // no date resolutions set for the given field; return default date resolution instead
 500       resolution = this.dateResolution;
 501     }
 502
 503     return resolution;
 504   }
 505
 506   /**
 507    * Sets the collator used to determine index term inclusion in ranges
 508    * for RangeQuerys.
 509    * <p/>
 510    * <strong>WARNING:</strong> Setting the rangeCollator to a non-null
 511    * collator using this method will cause every single index Term in the
 512    * Field referenced by lowerTerm and/or upperTerm to be examined.
 513    * Depending on the number of index Terms in this Field, the operation could
 514    * be very slow.
 515    *
 516    *  @param rc  the collator to use when constructing RangeQuerys
 517    */
 518   public void setRangeCollator(Collator rc) {
 519     rangeCollator = rc;
 520   }
 521
 522   /**
 523    * @return the collator used to determine index term inclusion in ranges
 524    * for RangeQuerys.
 525    */
 526   public Collator getRangeCollator() {
 527     return rangeCollator;
 528   }
 529
 530   protected void addClause(List<BooleanClause> clauses, int conj, int mods, Query q) {
 531     boolean required, prohibited;
 532
 533     // If this term is introduced by AND, make the preceding term required,
 534     // unless it's already prohibited
 535     if (clauses.size() > 0 && conj == CONJ_AND) {
 536       BooleanClause c = clauses.get(clauses.size()-1);
 537       if (!c.isProhibited())
 538         c.setOccur(BooleanClause.Occur.MUST);
 539     }
 540
 541     if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
 542       // If this term is introduced by OR, make the preceding term optional,
 543       // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
 544       // notice if the input is a OR b, first term is parsed as required; without
 545       // this modification a OR b would parsed as +a OR b
 546       BooleanClause c = clauses.get(clauses.size()-1);
 547       if (!c.isProhibited())
 548         c.setOccur(BooleanClause.Occur.SHOULD);
 549     }
 550
 551     // We might have been passed a null query; the term might have been
 552     // filtered away by the analyzer.
 553     if (q == null)
 554       return;
 555
 556     if (operator == OR_OPERATOR) {
 557       // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
 558       // introduced by NOT or -; make sure not to set both.
 559       prohibited = (mods == MOD_NOT);
 560       required = (mods == MOD_REQ);
 561       if (conj == CONJ_AND && !prohibited) {
 562         required = true;
 563       }
 564     } else {
 565       // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
 566       // if not PROHIBITED and not introduced by OR
 567       prohibited = (mods == MOD_NOT);
 568       required   = (!prohibited && conj != CONJ_OR);
 569     }
 570     if (required && !prohibited)
 571       clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST));
 572     else if (!required && !prohibited)
 573       clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD));
 574     else if (!required && prohibited)
 575       clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT));
 576     else
 577       throw new RuntimeException("Clause cannot be both required and prohibited");
 578   }
 579
 580   /**
 581    * @deprecated Use {@link #getFieldQuery(String,String,boolean)} instead.
 582    */
 583   @Deprecated
 584   protected Query getFieldQuery(String field, String queryText) throws ParseException {
 585     // treat the text as if it was quoted, to drive phrase logic with old versions.
 586     return getFieldQuery(field, queryText, true);
 587   }
 588
 589   /**
 590    * @exception ParseException throw in overridden method to disallow
 591    */
 592   protected Query getFieldQuery(String field, String queryText, boolean quoted)  throws ParseException {
 593     // Use the analyzer to get all the tokens, and then build a TermQuery,
 594     // PhraseQuery, or nothing based on the term count
 595
 596     TokenStream source;
 597     try {
 598       source = analyzer.reusableTokenStream(field, new StringReader(queryText));
 599       source.reset();
 600     } catch (IOException e) {
 601       source = analyzer.tokenStream(field, new StringReader(queryText));
 602     }
 603     CachingTokenFilter buffer = new CachingTokenFilter(source);
 604     CharTermAttribute termAtt = null;
 605     PositionIncrementAttribute posIncrAtt = null;
 606     int numTokens = 0;
 607
 608     boolean success = false;
 609     try {
 610       buffer.reset();
 611       success = true;
 612     } catch (IOException e) {
 613       // success==false if we hit an exception
 614     }
 615     if (success) {
 616       if (buffer.hasAttribute(CharTermAttribute.class)) {
 617         termAtt = buffer.getAttribute(CharTermAttribute.class);
 618       }
 619       if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
 620         posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
 621       }
 622     }
 623
 624     int positionCount = 0;
 625     boolean severalTokensAtSamePosition = false;
 626
 627     boolean hasMoreTokens = false;
 628     if (termAtt != null) {
 629       try {
 630         hasMoreTokens = buffer.incrementToken();
 631         while (hasMoreTokens) {
 632           numTokens++;
 633           int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
 634           if (positionIncrement != 0) {
 635             positionCount += positionIncrement;
 636           } else {
 637             severalTokensAtSamePosition = true;
 638           }
 639           hasMoreTokens = buffer.incrementToken();
 640         }
 641       } catch (IOException e) {
 642         // ignore
 643       }
 644     }
 645     try {
 646       // rewind the buffer stream
 647       buffer.reset();
 648
 649       // close original stream - all tokens buffered
 650       source.close();
 651     }
 652     catch (IOException e) {
 653       // ignore
 654     }
 655
 656     if (numTokens == 0)
 657       return null;
 658     else if (numTokens == 1) {
 659       String term = null;
 660       try {
 661         boolean hasNext = buffer.incrementToken();
 662         assert hasNext == true;
 663         term = termAtt.toString();
 664       } catch (IOException e) {
 665         // safe to ignore, because we know the number of tokens
 666       }
 667       return newTermQuery(new Term(field, term));
 668     } else {
 669       if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
 670         if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
 671           // no phrase query:
 672           BooleanQuery q = newBooleanQuery(positionCount == 1);
 673
 674           BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
 675             BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
 676
 677           for (int i = 0; i < numTokens; i++) {
 678             String term = null;
 679             try {
 680               boolean hasNext = buffer.incrementToken();
 681               assert hasNext == true;
 682               term = termAtt.toString();
 683             } catch (IOException e) {
 684               // safe to ignore, because we know the number of tokens
 685             }
 686
 687             Query currentQuery = newTermQuery(
 688                 new Term(field, term));
 689             q.add(currentQuery, occur);
 690           }
 691           return q;
 692         }
 693         else {
 694           // phrase query:
 695           MultiPhraseQuery mpq = newMultiPhraseQuery();
 696           mpq.setSlop(phraseSlop);
 697           List<Term> multiTerms = new ArrayList<Term>();
 698           int position = -1;
 699           for (int i = 0; i < numTokens; i++) {
 700             String term = null;
 701             int positionIncrement = 1;
 702             try {
 703               boolean hasNext = buffer.incrementToken();
 704               assert hasNext == true;
 705               term = termAtt.toString();
 706               if (posIncrAtt != null) {
 707                 positionIncrement = posIncrAtt.getPositionIncrement();
 708               }
 709             } catch (IOException e) {
 710               // safe to ignore, because we know the number of tokens
 711             }
 712
 713             if (positionIncrement > 0 && multiTerms.size() > 0) {
 714               if (enablePositionIncrements) {
 715                 mpq.add(multiTerms.toArray(new Term[0]),position);
 716               } else {
 717                 mpq.add(multiTerms.toArray(new Term[0]));
 718               }
 719               multiTerms.clear();
 720             }
 721             position += positionIncrement;
 722             multiTerms.add(new Term(field, term));
 723           }
 724           if (enablePositionIncrements) {
 725             mpq.add(multiTerms.toArray(new Term[0]),position);
 726           } else {
 727             mpq.add(multiTerms.toArray(new Term[0]));
 728           }
 729           return mpq;
 730         }
 731       }
 732       else {
 733         PhraseQuery pq = newPhraseQuery();
 734         pq.setSlop(phraseSlop);
 735         int position = -1;
 736
 737
 738         for (int i = 0; i < numTokens; i++) {
 739           String term = null;
 740           int positionIncrement = 1;
 741
 742           try {
 743             boolean hasNext = buffer.incrementToken();
 744             assert hasNext == true;
 745             term = termAtt.toString();
 746             if (posIncrAtt != null) {
 747               positionIncrement = posIncrAtt.getPositionIncrement();
 748             }
 749           } catch (IOException e) {
 750             // safe to ignore, because we know the number of tokens
 751           }
 752
 753           if (enablePositionIncrements) {
 754             position += positionIncrement;
 755             pq.add(new Term(field, term),position);
 756           } else {
 757             pq.add(new Term(field, term));
 758           }
 759         }
 760         return pq;
 761       }
 762     }
 763   }
 764
 765
 766
 767   /**
 768    * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
 769    * This method may be overridden, for example, to return
 770    * a SpanNearQuery instead of a PhraseQuery.
 771    *
 772    * @exception ParseException throw in overridden method to disallow
 773    */
 774   protected Query getFieldQuery(String field, String queryText, int slop)
 775         throws ParseException {
 776     Query query = hasNewAPI ? getFieldQuery(field, queryText, true) : getFieldQuery(field, queryText);
 777
 778     if (query instanceof PhraseQuery) {
 779       ((PhraseQuery) query).setSlop(slop);
 780     }
 781     if (query instanceof MultiPhraseQuery) {
 782       ((MultiPhraseQuery) query).setSlop(slop);
 783     }
 784
 785     return query;
 786   }
 787
 788
 789   /**
 790    * @exception ParseException throw in overridden method to disallow
 791    */
 792   protected Query getRangeQuery(String field,
 793                                 String part1,
 794                                 String part2,
 795                                 boolean inclusive) throws ParseException
 796   {
 797     if (lowercaseExpandedTerms) {
 798       part1 = part1.toLowerCase();
 799       part2 = part2.toLowerCase();
 800     }
 801     try {
 802       DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
 803       df.setLenient(true);
 804       Date d1 = df.parse(part1);
 805       Date d2 = df.parse(part2);
 806       if (inclusive) {
 807         // The user can only specify the date, not the time, so make sure
 808         // the time is set to the latest possible time of that date to really
 809         // include all documents:
 810         Calendar cal = Calendar.getInstance(locale);
 811         cal.setTime(d2);
 812         cal.set(Calendar.HOUR_OF_DAY, 23);
 813         cal.set(Calendar.MINUTE, 59);
 814         cal.set(Calendar.SECOND, 59);
 815         cal.set(Calendar.MILLISECOND, 999);
 816         d2 = cal.getTime();
 817       }
 818       DateTools.Resolution resolution = getDateResolution(field);
 819       if (resolution == null) {
 820         // no default or field specific date resolution has been set,
 821         // use deprecated DateField to maintain compatibility with
 822         // pre-1.9 Lucene versions.
 823         part1 = DateField.dateToString(d1);
 824         part2 = DateField.dateToString(d2);
 825       } else {
 826         part1 = DateTools.dateToString(d1, resolution);
 827         part2 = DateTools.dateToString(d2, resolution);
 828       }
 829     }
 830     catch (Exception e) { }
 831
 832     return newRangeQuery(field, part1, part2, inclusive);
 833   }
 834
 835  /**
 836   * Builds a new BooleanQuery instance
 837   * @param disableCoord disable coord
 838   * @return new BooleanQuery instance
 839   */
 840   protected BooleanQuery newBooleanQuery(boolean disableCoord) {
 841     return new BooleanQuery(disableCoord);
 842   }
 843
 844  /**
 845   * Builds a new BooleanClause instance
 846   * @param q sub query
 847   * @param occur how this clause should occur when matching documents
 848   * @return new BooleanClause instance
 849   */
 850   protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) {
 851     return new BooleanClause(q, occur);
 852   }
 853
 854   /**
 855    * Builds a new TermQuery instance
 856    * @param term term
 857    * @return new TermQuery instance
 858    */
 859   protected Query newTermQuery(Term term){
 860     return new TermQuery(term);
 861   }
 862
 863   /**
 864    * Builds a new PhraseQuery instance
 865    * @return new PhraseQuery instance
 866    */
 867   protected PhraseQuery newPhraseQuery(){
 868     return new PhraseQuery();
 869   }
 870
 871   /**
 872    * Builds a new MultiPhraseQuery instance
 873    * @return new MultiPhraseQuery instance
 874    */
 875   protected MultiPhraseQuery newMultiPhraseQuery(){
 876     return new MultiPhraseQuery();
 877   }
 878
 879   /**
 880    * Builds a new PrefixQuery instance
 881    * @param prefix Prefix term
 882    * @return new PrefixQuery instance
 883    */
 884   protected Query newPrefixQuery(Term prefix){
 885     PrefixQuery query = new PrefixQuery(prefix);
 886     query.setRewriteMethod(multiTermRewriteMethod);
 887     return query;
 888   }
 889
 890   /**
 891    * Builds a new FuzzyQuery instance
 892    * @param term Term
 893    * @param minimumSimilarity minimum similarity
 894    * @param prefixLength prefix length
 895    * @return new FuzzyQuery Instance
 896    */
 897   protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
 898     // FuzzyQuery doesn't yet allow constant score rewrite
 899     return new FuzzyQuery(term,minimumSimilarity,prefixLength);
 900   }
 901
 902   /**
 903    * Builds a new TermRangeQuery instance
 904    * @param field Field
 905    * @param part1 min
 906    * @param part2 max
 907    * @param inclusive true if range is inclusive
 908    * @return new TermRangeQuery instance
 909    */
 910   protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
 911     final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
 912     query.setRewriteMethod(multiTermRewriteMethod);
 913     return query;
 914   }
 915
 916   /**
 917    * Builds a new MatchAllDocsQuery instance
 918    * @return new MatchAllDocsQuery instance
 919    */
 920   protected Query newMatchAllDocsQuery() {
 921     return new MatchAllDocsQuery();
 922   }
 923
 924   /**
 925    * Builds a new WildcardQuery instance
 926    * @param t wildcard term
 927    * @return new WildcardQuery instance
 928    */
 929   protected Query newWildcardQuery(Term t) {
 930     WildcardQuery query = new WildcardQuery(t);
 931     query.setRewriteMethod(multiTermRewriteMethod);
 932     return query;
 933   }
 934
 935   /**
 936    * Factory method for generating query, given a set of clauses.
 937    * By default creates a boolean query composed of clauses passed in.
 938    *
 939    * Can be overridden by extending classes, to modify query being
 940    * returned.
 941    *
 942    * @param clauses List that contains {@link BooleanClause} instances
 943    *    to join.
 944    *
 945    * @return Resulting {@link Query} object.
 946    * @exception ParseException throw in overridden method to disallow
 947    */
 948   protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException {
 949     return getBooleanQuery(clauses, false);
 950   }
 951
 952   /**
 953    * Factory method for generating query, given a set of clauses.
 954    * By default creates a boolean query composed of clauses passed in.
 955    *
 956    * Can be overridden by extending classes, to modify query being
 957    * returned.
 958    *
 959    * @param clauses List that contains {@link BooleanClause} instances
 960    *    to join.
 961    * @param disableCoord true if coord scoring should be disabled.
 962    *
 963    * @return Resulting {@link Query} object.
 964    * @exception ParseException throw in overridden method to disallow
 965    */
 966   protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
 967     throws ParseException
 968   {
 969     if (clauses.size()==0) {
 970       return null; // all clause words were filtered away by the analyzer.
 971     }
 972     BooleanQuery query = newBooleanQuery(disableCoord);
 973     for(final BooleanClause clause: clauses) {
 974       query.add(clause);
 975     }
 976     return query;
 977   }
 978
 979   /**
 980    * Factory method for generating a query. Called when parser
 981    * parses an input term token that contains one or more wildcard
 982    * characters (? and *), but is not a prefix term token (one
 983    * that has just a single * character at the end)
 984    *<p>
 985    * Depending on settings, prefix term may be lower-cased
 986    * automatically. It will not go through the default Analyzer,
 987    * however, since normal Analyzers are unlikely to work properly
 988    * with wildcard templates.
 989    *<p>
 990    * Can be overridden by extending classes, to provide custom handling for
 991    * wildcard queries, which may be necessary due to missing analyzer calls.
 992    *
 993    * @param field Name of the field query will use.
 994    * @param termStr Term token that contains one or more wild card
 995    *   characters (? or *), but is not simple prefix term
 996    *
 997    * @return Resulting {@link Query} built for the term
 998    * @exception ParseException throw in overridden method to disallow
 999    */
1000   protected Query getWildcardQuery(String field, String termStr) throws ParseException
1001   {
1002     if ("*".equals(field)) {
1003       if ("*".equals(termStr)) return newMatchAllDocsQuery();
1004     }
1005     if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?")))
1006       throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
1007     if (lowercaseExpandedTerms) {
1008       termStr = termStr.toLowerCase();
1009     }
1010     Term t = new Term(field, termStr);
1011     return newWildcardQuery(t);
1012   }
1013
1014   /**
1015    * Factory method for generating a query (similar to
1016    * {@link #getWildcardQuery}). Called when parser parses an input term
1017    * token that uses prefix notation; that is, contains a single '*' wildcard
1018    * character as its last character. Since this is a special case
1019    * of generic wildcard term, and such a query can be optimized easily,
1020    * this usually results in a different query object.
1021    *<p>
1022    * Depending on settings, a prefix term may be lower-cased
1023    * automatically. It will not go through the default Analyzer,
1024    * however, since normal Analyzers are unlikely to work properly
1025    * with wildcard templates.
1026    *<p>
1027    * Can be overridden by extending classes, to provide custom handling for
1028    * wild card queries, which may be necessary due to missing analyzer calls.
1029    *
1030    * @param field Name of the field query will use.
1031    * @param termStr Term token to use for building term for the query
1032    *    (<b>without</b> trailing '*' character!)
1033    *
1034    * @return Resulting {@link Query} built for the term
1035    * @exception ParseException throw in overridden method to disallow
1036    */
1037   protected Query getPrefixQuery(String field, String termStr) throws ParseException
1038   {
1039     if (!allowLeadingWildcard && termStr.startsWith("*"))
1040       throw new ParseException("'*' not allowed as first character in PrefixQuery");
1041     if (lowercaseExpandedTerms) {
1042       termStr = termStr.toLowerCase();
1043     }
1044     Term t = new Term(field, termStr);
1045     return newPrefixQuery(t);
1046   }
1047
1048    /**
1049    * Factory method for generating a query (similar to
1050    * {@link #getWildcardQuery}). Called when parser parses
1051    * an input term token that has the fuzzy suffix (~) appended.
1052    *
1053    * @param field Name of the field query will use.
1054    * @param termStr Term token to use for building term for the query
1055    *
1056    * @return Resulting {@link Query} built for the term
1057    * @exception ParseException throw in overridden method to disallow
1058    */
1059   protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
1060   {
1061     if (lowercaseExpandedTerms) {
1062       termStr = termStr.toLowerCase();
1063     }
1064     Term t = new Term(field, termStr);
1065     return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
1066   }
1067
1068   /**
1069    * Returns a String where the escape char has been
1070    * removed, or kept only once if there was a double escape.
1071    *
1072    * Supports escaped unicode characters, e. g. translates
1073    * <code>\\u0041</code> to <code>A</code>.
1074    *
1075    */
1076   private String discardEscapeChar(String input) throws ParseException {
1077     // Create char array to hold unescaped char sequence
1078     char[] output = new char[input.length()];
1079
1080     // The length of the output can be less than the input
1081     // due to discarded escape chars. This variable holds
1082     // the actual length of the output
1083     int length = 0;
1084
1085     // We remember whether the last processed character was
1086     // an escape character
1087     boolean lastCharWasEscapeChar = false;
1088
1089     // The multiplier the current unicode digit must be multiplied with.
1090     // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
1091     int codePointMultiplier = 0;
1092
1093     // Used to calculate the codepoint of the escaped unicode character
1094     int codePoint = 0;
1095
1096     for (int i = 0; i < input.length(); i++) {
1097       char curChar = input.charAt(i);
1098       if (codePointMultiplier > 0) {
1099         codePoint += hexToInt(curChar) * codePointMultiplier;
1100         codePointMultiplier >>>= 4;
1101         if (codePointMultiplier == 0) {
1102           output[length++] = (char)codePoint;
1103           codePoint = 0;
1104         }
1105       } else if (lastCharWasEscapeChar) {
1106         if (curChar == 'u') {
1107           // found an escaped unicode character
1108           codePointMultiplier = 16 * 16 * 16;
1109         } else {
1110           // this character was escaped
1111           output[length] = curChar;
1112           length++;
1113         }
1114         lastCharWasEscapeChar = false;
1115       } else {
1116         if (curChar == '\\') {
1117           lastCharWasEscapeChar = true;
1118         } else {
1119           output[length] = curChar;
1120           length++;
1121         }
1122       }
1123     }
1124
1125     if (codePointMultiplier > 0) {
1126       throw new ParseException("Truncated unicode escape sequence.");
1127     }
1128
1129     if (lastCharWasEscapeChar) {
1130       throw new ParseException("Term can not end with escape character.");
1131     }
1132
1133     return new String(output, 0, length);
1134   }
1135
1136   /** Returns the numeric value of the hexadecimal character */
1137   private static final int hexToInt(char c) throws ParseException {
1138     if ('0' <= c && c <= '9') {
1139       return c - '0';
1140     } else if ('a' <= c && c <= 'f'){
1141       return c - 'a' + 10;
1142     } else if ('A' <= c && c <= 'F') {
1143       return c - 'A' + 10;
1144     } else {
1145       throw new ParseException("None-hex character in unicode escape sequence: " + c);
1146     }
1147   }
1148
1149   /**
1150    * Returns a String where those characters that QueryParser
1151    * expects to be escaped are escaped by a preceding <code>\</code>.
1152    */
1153   public static String escape(String s) {
1154     StringBuilder sb = new StringBuilder();
1155     for (int i = 0; i < s.length(); i++) {
1156       char c = s.charAt(i);
1157       // These characters are part of the query syntax and must be escaped
1158       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
1159         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
1160         || c == '*' || c == '?' || c == '|' || c == '&') {
1161         sb.append('\\');
1162       }
1163       sb.append(c);
1164     }
1165     return sb.toString();
1166   }
1167
1168   /**
1169    * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
1170    * Usage:<br>
1171    * <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
1172    */
1173   public static void main(String[] args) throws Exception {
1174     if (args.length == 0) {
1175       System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
1176       System.exit(0);
1177     }
1178     QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
1179                            new org.apache.lucene.analysis.SimpleAnalyzer());
1180     Query q = qp.parse(args[0]);
1181     System.out.println(q.toString("field"));
1182   }
1183 }
1184
1185 PARSER_END(QueryParser)
1186
1187 /* ***************** */
1188 /* Token Definitions */
1189 /* ***************** */
1190
1191 <*> TOKEN : {
1192   <#_NUM_CHAR:   ["0"-"9"] >
1193 // every character that follows a backslash is considered as an escaped character
1194 | <#_ESCAPED_CHAR: "\\" ~[] >
1195 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
1196                            "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
1197                        | <_ESCAPED_CHAR> ) >
1198 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
1199 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
1200 | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
1201 }
1202
1203 <DEFAULT, RangeIn, RangeEx> SKIP : {
1204   < <_WHITESPACE>>
1205 }
1206
1207 <DEFAULT> TOKEN : {
1208   <AND:       ("AND" | "&&") >
1209 | <OR:        ("OR" | "||") >
1210 | <NOT:       ("NOT" | "!") >
1211 | <PLUS:      "+" >
1212 | <MINUS:     "-" >
1213 | <LPAREN:    "(" >
1214 | <RPAREN:    ")" >
1215 | <COLON:     ":" >
1216 | <STAR:      "*" >
1217 | <CARAT:     "^" > : Boost
1218 | <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">
1219 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
1220 | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
1221 | <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
1222 | <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
1223 | <RANGEIN_START: "[" > : RangeIn
1224 | <RANGEEX_START: "{" > : RangeEx
1225 }
1226
1227 <Boost> TOKEN : {
1228 <NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
1229 }
1230
1231 <RangeIn> TOKEN : {
1232 <RANGEIN_TO: "TO">
1233 | <RANGEIN_END: "]"> : DEFAULT
1234 | <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
1235 | <RANGEIN_GOOP: (~[ " ", "]" ])+ >
1236 }
1237
1238 <RangeEx> TOKEN : {
1239 <RANGEEX_TO: "TO">
1240 | <RANGEEX_END: "}"> : DEFAULT
1241 | <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
1242 | <RANGEEX_GOOP: (~[ " ", "}" ])+ >
1243 }
1244
1245 // *   Query  ::= ( Clause )*
1246 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
1247
1248 int Conjunction() : {
1249   int ret = CONJ_NONE;
1250 }
1251 {
1252   [
1253     <AND> { ret = CONJ_AND; }
1254     | <OR>  { ret = CONJ_OR; }
1255   ]
1256   { return ret; }
1257 }
1258
1259 int Modifiers() : {
1260   int ret = MOD_NONE;
1261 }
1262 {
1263   [
1264      <PLUS> { ret = MOD_REQ; }
1265      | <MINUS> { ret = MOD_NOT; }
1266      | <NOT> { ret = MOD_NOT; }
1267   ]
1268   { return ret; }
1269 }
1270
1271 // This makes sure that there is no garbage after the query string
1272 Query TopLevelQuery(String field) :
1273 {
1274         Query q;
1275 }
1276 {
1277         q=Query(field) <EOF>
1278         {
1279                 return q;
1280         }
1281 }
1282
1283 Query Query(String field) :
1284 {
1285   List<BooleanClause> clauses = new ArrayList<BooleanClause>();
1286   Query q, firstQuery=null;
1287   int conj, mods;
1288 }
1289 {
1290   mods=Modifiers() q=Clause(field)
1291   {
1292     addClause(clauses, CONJ_NONE, mods, q);
1293     if (mods == MOD_NONE)
1294         firstQuery=q;
1295   }
1296   (
1297     conj=Conjunction() mods=Modifiers() q=Clause(field)
1298     { addClause(clauses, conj, mods, q); }
1299   )*
1300     {
1301       if (clauses.size() == 1 && firstQuery != null)
1302         return firstQuery;
1303       else {
1304   return getBooleanQuery(clauses);
1305       }
1306     }
1307 }
1308
1309 Query Clause(String field) : {
1310   Query q;
1311   Token fieldToken=null, boost=null;
1312 }
1313 {
1314   [
1315     LOOKAHEAD(2)
1316     (
1317     fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
1318     | <STAR> <COLON> {field="*";}
1319     )
1320   ]
1321
1322   (
1323    q=Term(field)
1324    | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
1325
1326   )
1327     {
1328       if (boost != null) {
1329         float f = (float)1.0;
1330   try {
1331     f = Float.valueOf(boost.image).floatValue();
1332           q.setBoost(f);
1333   } catch (Exception ignored) { }
1334       }
1335       return q;
1336     }
1337 }
1338
1339
1340 Query Term(String field) : {
1341   Token term, boost=null, fuzzySlop=null, goop1, goop2;
1342   boolean prefix = false;
1343   boolean wildcard = false;
1344   boolean fuzzy = false;
1345   Query q;
1346 }
1347 {
1348   (
1349      (
1350        term=<TERM>
1351        | term=<STAR> { wildcard=true; }
1352        | term=<PREFIXTERM> { prefix=true; }
1353        | term=<WILDTERM> { wildcard=true; }
1354        | term=<NUMBER>
1355      )
1356      [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
1357      [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
1358      {
1359        String termImage=discardEscapeChar(term.image);
1360        if (wildcard) {
1361        q = getWildcardQuery(field, termImage);
1362        } else if (prefix) {
1363          q = getPrefixQuery(field,
1364            discardEscapeChar(term.image.substring
1365           (0, term.image.length()-1)));
1366        } else if (fuzzy) {
1367           float fms = fuzzyMinSim;
1368           try {
1369             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
1370           } catch (Exception ignored) { }
1371          if(fms < 0.0f || fms > 1.0f){
1372            throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
1373          }
1374          q = getFuzzyQuery(field, termImage,fms);
1375        } else {
1376          q = hasNewAPI ? getFieldQuery(field, termImage, false) : getFieldQuery(field, termImage);
1377        }
1378      }
1379      | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
1380          [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
1381          <RANGEIN_END> )
1382        [ <CARAT> boost=<NUMBER> ]
1383         {
1384           if (goop1.kind == RANGEIN_QUOTED) {
1385             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
1386           }
1387           if (goop2.kind == RANGEIN_QUOTED) {
1388             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
1389           }
1390           q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), true);
1391         }
1392      | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
1393          [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
1394          <RANGEEX_END> )
1395        [ <CARAT> boost=<NUMBER> ]
1396         {
1397           if (goop1.kind == RANGEEX_QUOTED) {
1398             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
1399           }
1400           if (goop2.kind == RANGEEX_QUOTED) {
1401             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
1402           }
1403
1404           q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), false);
1405         }
1406      | term=<QUOTED>
1407        [ fuzzySlop=<FUZZY_SLOP> ]
1408        [ <CARAT> boost=<NUMBER> ]
1409        {
1410          int s = phraseSlop;
1411
1412          if (fuzzySlop != null) {
1413            try {
1414              s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
1415            }
1416            catch (Exception ignored) { }
1417          }
1418          q = getFieldQuery(field, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s);
1419        }
1420   )
1421   {
1422     if (boost != null) {
1423       float f = (float) 1.0;
1424       try {
1425         f = Float.valueOf(boost.image).floatValue();
1426       }
1427       catch (Exception ignored) {
1428     /* Should this be handled somehow? (defaults to "no boost", if
1429      * boost number is invalid)
1430      */
1431       }
1432
1433       // avoid boosting null queries, such as those caused by stop words
1434       if (q != null) {
1435         q.setBoost(f);
1436       }
1437     }
1438     return q;
1439   }
1440 }