2 * Standard file is based on the TextParser.jj from lucene 2.3
7 JAVA_UNICODE_ESCAPE=true;
8 USER_CHAR_STREAM=false;
13 PARSER_BEGIN(StandardSyntaxParser)
14 package org.apache.lucene.queryParser.standard.parser;
17 * Licensed to the Apache Software Foundation (ASF) under one or more
18 * contributor license agreements. See the NOTICE file distributed with
19 * this work for additional information regarding copyright ownership.
20 * The ASF licenses this file to You under the Apache License, Version 2.0
21 * (the "License"); you may not use this file except in compliance with
22 * the License. You may obtain a copy of the License at
24 * http://www.apache.org/licenses/LICENSE-2.0
26 * Unless required by applicable law or agreed to in writing, software
27 * distributed under the License is distributed on an "AS IS" BASIS,
28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 * See the License for the specific language governing permissions and
30 * limitations under the License.
33 import java.io.StringReader;
34 import java.util.ArrayList;
35 import java.util.List;
36 import java.util.Vector;
38 import org.apache.lucene.messages.Message;
39 import org.apache.lucene.messages.MessageImpl;
40 import org.apache.lucene.queryParser.core.QueryNodeError;
41 import org.apache.lucene.queryParser.core.QueryNodeException;
42 import org.apache.lucene.queryParser.core.QueryNodeParseException;
43 import org.apache.lucene.queryParser.core.messages.QueryParserMessages;
44 import org.apache.lucene.queryParser.core.nodes.AndQueryNode;
45 import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
46 import org.apache.lucene.queryParser.core.nodes.BoostQueryNode;
47 import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
48 import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
49 import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
50 import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
51 import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
52 import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
53 import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
54 import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
55 import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
56 import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
57 import org.apache.lucene.queryParser.core.nodes.QueryNode;
58 import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl;
59 import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
60 import org.apache.lucene.queryParser.core.parser.SyntaxParser;
62 public class StandardSyntaxParser implements SyntaxParser {
64 private static final int CONJ_NONE =0;
65 private static final int CONJ_AND =2;
66 private static final int CONJ_OR =2;
69 // syntax parser constructor
70 public StandardSyntaxParser() {
71 this(new StringReader(""));
73 /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}.
74 * @param query the query string to be parsed.
75 * @throws ParseException if the parsing fails
77 public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
78 ReInit(new StringReader(query.toString()));
80 // TopLevelQuery is a Query followed by the end-of-input (EOF)
81 QueryNode querynode = TopLevelQuery(field);
84 catch (ParseException tme) {
89 Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage());
90 QueryNodeParseException e = new QueryNodeParseException(tme);
92 e.setNonLocalizedMessage(message);
99 PARSER_END(StandardSyntaxParser)
101 /* ***************** */
102 /* Token Definitions */
103 /* ***************** */
106 <#_NUM_CHAR: ["0"-"9"] >
107 // every character that follows a backslash is considered as an escaped character
108 | <#_ESCAPED_CHAR: "\\" ~[] >
109 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
110 "[", "]", "\"", "{", "}", "~", "\\" ]
111 | <_ESCAPED_CHAR> ) >
112 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
113 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
114 | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
117 <DEFAULT, RangeIn, RangeEx> SKIP : {
122 <AND: ("AND" | "&&") >
123 | <OR: ("OR" | "||") >
124 | <NOT: ("NOT" | "!") >
130 | <CARAT: "^" > : Boost
131 | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
132 | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
133 | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
134 | <RANGEIN_START: "[" > : RangeIn
135 | <RANGEEX_START: "{" > : RangeEx
139 <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
144 | <RANGEIN_END: "]"> : DEFAULT
145 | <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
146 | <RANGEIN_GOOP: (~[ " ", "]" ])+ >
151 | <RANGEEX_END: "}"> : DEFAULT
152 | <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
153 | <RANGEEX_GOOP: (~[ " ", "}" ])+ >
156 // * Query ::= ( Clause )*
157 // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
159 int Conjunction() : {
164 <AND> { ret = CONJ_AND; }
165 | <OR> { ret = CONJ_OR; }
170 ModifierQueryNode.Modifier Modifiers() : {
171 ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE;
175 <PLUS> { ret = ModifierQueryNode.Modifier.MOD_REQ; }
176 | <MINUS> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
177 | <NOT> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
182 // This makes sure that there is no garbage after the query string
183 QueryNode TopLevelQuery(CharSequence field) :
194 // These changes were made to introduce operator precedence:
195 // - Clause() now returns a QueryNode.
196 // - The modifiers are consumed by Clause() and returned as part of the QueryNode Object
197 // - Query does not consume conjunctions (AND, OR) anymore.
198 // - This is now done by two new non-terminals: ConjClause and DisjClause
199 // The parse tree looks similar to this:
200 // Query ::= DisjQuery ( DisjQuery )*
201 // DisjQuery ::= ConjQuery ( OR ConjQuery )*
202 // ConjQuery ::= Clause ( AND Clause )*
203 // Clause ::= [ Modifier ] ...
206 QueryNode Query(CharSequence field) :
208 Vector<QueryNode> clauses = null;
209 QueryNode c, first=null;
212 first=DisjQuery(field)
216 if (clauses == null) {
217 clauses = new Vector<QueryNode>();
218 clauses.addElement(first);
220 clauses.addElement(c);
224 if (clauses != null) {
225 return new BooleanQueryNode(clauses);
232 QueryNode DisjQuery(CharSequence field) : {
234 Vector<QueryNode> clauses = null;
237 first = ConjQuery(field)
239 <OR> c=ConjQuery(field)
241 if (clauses == null) {
242 clauses = new Vector<QueryNode>();
243 clauses.addElement(first);
245 clauses.addElement(c);
249 if (clauses != null) {
250 return new OrQueryNode(clauses);
257 QueryNode ConjQuery(CharSequence field) : {
259 Vector<QueryNode> clauses = null;
262 first = ModClause(field)
264 <AND> c=ModClause(field)
266 if (clauses == null) {
267 clauses = new Vector<QueryNode>();
268 clauses.addElement(first);
270 clauses.addElement(c);
274 if (clauses != null) {
275 return new AndQueryNode(clauses);
282 // QueryNode Query(CharSequence field) :
284 // List clauses = new ArrayList();
285 // List modifiers = new ArrayList();
286 // QueryNode q, firstQuery=null;
287 // ModifierQueryNode.Modifier mods;
291 // mods=Modifiers() q=Clause(field)
293 // if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q;
295 // // do not create modifier nodes with MOD_NONE
296 // if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
297 // q = new ModifierQueryNode(q, mods);
302 // conj=Conjunction() mods=Modifiers() q=Clause(field)
304 // // do not create modifier nodes with MOD_NONE
305 // if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
306 // q = new ModifierQueryNode(q, mods);
309 // //TODO: figure out what to do with AND and ORs
313 // if (clauses.size() == 1 && firstQuery != null)
314 // return firstQuery;
316 // return new BooleanQueryNode(clauses);
321 QueryNode ModClause(CharSequence field) : {
323 ModifierQueryNode.Modifier mods;
326 mods=Modifiers() q= Clause(field) {
327 if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
328 q = new ModifierQueryNode(q, mods);
334 QueryNode Clause(CharSequence field) : {
336 Token fieldToken=null, boost=null;
337 boolean group = false;
343 fieldToken=<TERM> <COLON> {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}
349 | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? {group=true;}
354 float f = (float)1.0;
356 f = Float.valueOf(boost.image).floatValue();
357 // avoid boosting null queries, such as those caused by stop words
359 q = new BoostQueryNode(q, f);
361 } catch (Exception ignored) {
362 /* Should this be handled somehow? (defaults to "no boost", if
363 * boost number is invalid)
367 if (group) { q = new GroupQueryNode(q);}
373 QueryNode Term(CharSequence field) : {
374 Token term, boost=null, fuzzySlop=null, goop1, goop2;
375 boolean fuzzy = false;
377 ParametricQueryNode qLower, qUpper;
378 float defaultMinSimilarity = 0.5f;
383 term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
386 [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
387 [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
390 float fms = defaultMinSimilarity;
392 fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
393 } catch (Exception ignored) { }
394 if(fms < 0.0f || fms > 1.0f){
395 throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
397 q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
400 | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
401 [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
403 [ <CARAT> boost=<NUMBER> ]
405 if (goop1.kind == RANGEIN_QUOTED) {
406 goop1.image = goop1.image.substring(1, goop1.image.length()-1);
408 if (goop2.kind == RANGEIN_QUOTED) {
409 goop2.image = goop2.image.substring(1, goop2.image.length()-1);
412 qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE,
413 EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
414 qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE,
415 EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
416 q = new ParametricRangeQueryNode(qLower, qUpper);
418 | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
419 [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
421 [ <CARAT> boost=<NUMBER> ]
423 if (goop1.kind == RANGEEX_QUOTED) {
424 goop1.image = goop1.image.substring(1, goop1.image.length()-1);
426 if (goop2.kind == RANGEEX_QUOTED) {
427 goop2.image = goop2.image.substring(1, goop2.image.length()-1);
429 qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT,
430 EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
431 qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT,
432 EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
433 q = new ParametricRangeQueryNode(qLower, qUpper);
435 | term=<QUOTED> {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);}
436 [ fuzzySlop=<FUZZY_SLOP> ]
437 [ <CARAT> boost=<NUMBER> ]
441 if (fuzzySlop != null) {
443 phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
444 q = new SlopQueryNode(q, phraseSlop);
446 catch (Exception ignored) {
447 /* Should this be handled somehow? (defaults to "no PhraseSlop", if
448 * slop number is invalid)
457 float f = (float)1.0;
459 f = Float.valueOf(boost.image).floatValue();
460 // avoid boosting null queries, such as those caused by stop words
462 q = new BoostQueryNode(q, f);
464 } catch (Exception ignored) {
465 /* Should this be handled somehow? (defaults to "no boost", if
466 * boost number is invalid)