/** * Standard file is based on the TextParser.jj from lucene 2.3 */ options { STATIC=false; JAVA_UNICODE_ESCAPE=true; USER_CHAR_STREAM=false; IGNORE_CASE=false; JDK_VERSION="1.5"; } PARSER_BEGIN(StandardSyntaxParser) package org.apache.lucene.queryParser.standard.parser; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.Vector; import org.apache.lucene.messages.Message; import org.apache.lucene.messages.MessageImpl; import org.apache.lucene.queryParser.core.QueryNodeError; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.core.QueryNodeParseException; import org.apache.lucene.queryParser.core.messages.QueryParserMessages; import org.apache.lucene.queryParser.core.nodes.AndQueryNode; import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; import org.apache.lucene.queryParser.core.nodes.BoostQueryNode; import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode; import org.apache.lucene.queryParser.core.nodes.GroupQueryNode; import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; import org.apache.lucene.queryParser.core.nodes.OrQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl; import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode; import org.apache.lucene.queryParser.core.parser.SyntaxParser; public class StandardSyntaxParser implements SyntaxParser { private static final int CONJ_NONE =0; private static final int CONJ_AND =2; private static final int CONJ_OR =2; // syntax parser constructor public StandardSyntaxParser() { this(new StringReader("")); } /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}. * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { ReInit(new StringReader(query.toString())); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) QueryNode querynode = TopLevelQuery(field); return querynode; } catch (ParseException tme) { tme.setQuery(query); throw tme; } catch (Error tme) { Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); QueryNodeParseException e = new QueryNodeParseException(tme); e.setQuery(query); e.setNonLocalizedMessage(message); throw e; } } } PARSER_END(StandardSyntaxParser) /* ***************** */ /* Token Definitions */ /* ***************** */ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > // every character that follows a backslash is considered as an escaped character | <#_ESCAPED_CHAR: "\\" ~[] > | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "\\" ] | <_ESCAPED_CHAR> ) > | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > } SKIP : { < <_WHITESPACE>> } TOKEN : { | | | | | | | | : Boost | )* "\""> | (<_TERM_CHAR>)* > | )+ ( "." (<_NUM_CHAR>)+ )? )? > | : RangeIn | : RangeEx } TOKEN : { )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT } TOKEN : { | : DEFAULT | | } TOKEN : { | : DEFAULT | | } // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) int Conjunction() : { int ret = CONJ_NONE; } { [ { ret = CONJ_AND; } | { ret = CONJ_OR; } ] { return ret; } } ModifierQueryNode.Modifier Modifiers() : { ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; } { [ { ret = ModifierQueryNode.Modifier.MOD_REQ; } | { ret = ModifierQueryNode.Modifier.MOD_NOT; } | { ret = ModifierQueryNode.Modifier.MOD_NOT; } ] { return ret; } } // This makes sure that there is no garbage after the query string QueryNode TopLevelQuery(CharSequence field) : { QueryNode q; } { q=Query(field) { return q; } } // These changes were made to introduce operator precedence: // - Clause() now returns a QueryNode. // - The modifiers are consumed by Clause() and returned as part of the QueryNode Object // - Query does not consume conjunctions (AND, OR) anymore. // - This is now done by two new non-terminals: ConjClause and DisjClause // The parse tree looks similar to this: // Query ::= DisjQuery ( DisjQuery )* // DisjQuery ::= ConjQuery ( OR ConjQuery )* // ConjQuery ::= Clause ( AND Clause )* // Clause ::= [ Modifier ] ... QueryNode Query(CharSequence field) : { Vector clauses = null; QueryNode c, first=null; } { first=DisjQuery(field) ( c=DisjQuery(field) { if (clauses == null) { clauses = new Vector(); clauses.addElement(first); } clauses.addElement(c); } )* { if (clauses != null) { return new BooleanQueryNode(clauses); } else { return first; } } } QueryNode DisjQuery(CharSequence field) : { QueryNode first, c; Vector clauses = null; } { first = ConjQuery(field) ( c=ConjQuery(field) { if (clauses == null) { clauses = new Vector(); clauses.addElement(first); } clauses.addElement(c); } )* { if (clauses != null) { return new OrQueryNode(clauses); } else { return first; } } } QueryNode ConjQuery(CharSequence field) : { QueryNode first, c; Vector clauses = null; } { first = ModClause(field) ( c=ModClause(field) { if (clauses == null) { clauses = new Vector(); clauses.addElement(first); } clauses.addElement(c); } )* { if (clauses != null) { return new AndQueryNode(clauses); } else { return first; } } } // QueryNode Query(CharSequence field) : // { // List clauses = new ArrayList(); // List modifiers = new ArrayList(); // QueryNode q, firstQuery=null; // ModifierQueryNode.Modifier mods; // int conj; // } // { // mods=Modifiers() q=Clause(field) // { // if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; // // // do not create modifier nodes with MOD_NONE // if (mods != ModifierQueryNode.Modifier.MOD_NONE) { // q = new ModifierQueryNode(q, mods); // } // clauses.add(q); // } // ( // conj=Conjunction() mods=Modifiers() q=Clause(field) // { // // do not create modifier nodes with MOD_NONE // if (mods != ModifierQueryNode.Modifier.MOD_NONE) { // q = new ModifierQueryNode(q, mods); // } // clauses.add(q); // //TODO: figure out what to do with AND and ORs // } // )* // { // if (clauses.size() == 1 && firstQuery != null) // return firstQuery; // else { // return new BooleanQueryNode(clauses); // } // } // } QueryNode ModClause(CharSequence field) : { QueryNode q; ModifierQueryNode.Modifier mods; } { mods=Modifiers() q= Clause(field) { if (mods != ModifierQueryNode.Modifier.MOD_NONE) { q = new ModifierQueryNode(q, mods); } return q; } } QueryNode Clause(CharSequence field) : { QueryNode q; Token fieldToken=null, boost=null; boolean group = false; } { [ LOOKAHEAD(2) ( fieldToken= {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} ) ] ( q=Term(field) | q=Query(field) ( boost=)? {group=true;} ) { if (boost != null) { float f = (float)1.0; try { f = Float.valueOf(boost.image).floatValue(); // avoid boosting null queries, such as those caused by stop words if (q != null) { q = new BoostQueryNode(q, f); } } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } } if (group) { q = new GroupQueryNode(q);} return q; } } QueryNode Term(CharSequence field) : { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean fuzzy = false; QueryNode q =null; ParametricQueryNode qLower, qUpper; float defaultMinSimilarity = 0.5f; } { ( ( term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } | term= ) [ fuzzySlop= { fuzzy=true; } ] [ boost= [ fuzzySlop= { fuzzy=true; } ] ] { if (fuzzy) { float fms = defaultMinSimilarity; try { fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); } catch (Exception ignored) { } if(fms < 0.0f || fms > 1.0f){ throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); } q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); } } | ( ( goop1=|goop1= ) [ ] ( goop2=|goop2= ) ) [ boost= ] { if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); } if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); } qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE, EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE, EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); q = new ParametricRangeQueryNode(qLower, qUpper); } | ( ( goop1=|goop1= ) [ ] ( goop2=|goop2= ) ) [ boost= ] { if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); } if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); } qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT, EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT, EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); q = new ParametricRangeQueryNode(qLower, qUpper); } | term= {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);} [ fuzzySlop= ] [ boost= ] { int phraseSlop = 0; if (fuzzySlop != null) { try { phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); q = new SlopQueryNode(q, phraseSlop); } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no PhraseSlop", if * slop number is invalid) */ } } } ) { if (boost != null) { float f = (float)1.0; try { f = Float.valueOf(boost.image).floatValue(); // avoid boosting null queries, such as those caused by stop words if (q != null) { q = new BoostQueryNode(q, f); } } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } } return q; } }