--- /dev/null
+/**
+ * Standard file is based on the TextParser.jj from lucene 2.3
+ */
+
+options {
+ STATIC=false;
+ JAVA_UNICODE_ESCAPE=true;
+ USER_CHAR_STREAM=false;
+ IGNORE_CASE=false;
+ JDK_VERSION="1.5";
+}
+
+PARSER_BEGIN(StandardSyntaxParser)
+package org.apache.lucene.queryParser.standard.parser;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.lucene.messages.Message;
+import org.apache.lucene.messages.MessageImpl;
+import org.apache.lucene.queryParser.core.QueryNodeError;
+import org.apache.lucene.queryParser.core.QueryNodeException;
+import org.apache.lucene.queryParser.core.QueryNodeParseException;
+import org.apache.lucene.queryParser.core.messages.QueryParserMessages;
+import org.apache.lucene.queryParser.core.nodes.AndQueryNode;
+import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
+import org.apache.lucene.queryParser.core.nodes.BoostQueryNode;
+import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
+import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
+import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
+import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
+import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
+import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
+import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
+import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
+import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
+import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
+import org.apache.lucene.queryParser.core.nodes.QueryNode;
+import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl;
+import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
+import org.apache.lucene.queryParser.core.parser.SyntaxParser;
+
+public class StandardSyntaxParser implements SyntaxParser {
+
+ private static final int CONJ_NONE =0;
+ private static final int CONJ_AND =2;
+ private static final int CONJ_OR =2;
+
+
+ // syntax parser constructor
+ public StandardSyntaxParser() {
+ this(new StringReader(""));
+ }
+ /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}.
+ * @param query the query string to be parsed.
+ * @throws ParseException if the parsing fails
+ */
+ public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
+ ReInit(new StringReader(query.toString()));
+ try {
+ // TopLevelQuery is a Query followed by the end-of-input (EOF)
+ QueryNode querynode = TopLevelQuery(field);
+ return querynode;
+ }
+ catch (ParseException tme) {
+ tme.setQuery(query);
+ throw tme;
+ }
+ catch (Error tme) {
+ Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage());
+ QueryNodeParseException e = new QueryNodeParseException(tme);
+ e.setQuery(query);
+ e.setNonLocalizedMessage(message);
+ throw e;
+ }
+ }
+
+}
+
+PARSER_END(StandardSyntaxParser)
+
+/* ***************** */
+/* Token Definitions */
+/* ***************** */
+
+<*> TOKEN : {
+ <#_NUM_CHAR: ["0"-"9"] >
+// every character that follows a backslash is considered as an escaped character
+| <#_ESCAPED_CHAR: "\\" ~[] >
+| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
+ "[", "]", "\"", "{", "}", "~", "\\" ]
+ | <_ESCAPED_CHAR> ) >
+| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
+| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
+}
+
+<DEFAULT, RangeIn, RangeEx> SKIP : {
+ < <_WHITESPACE>>
+}
+
+<DEFAULT> TOKEN : {
+ <AND: ("AND" | "&&") >
+| <OR: ("OR" | "||") >
+| <NOT: ("NOT" | "!") >
+| <PLUS: "+" >
+| <MINUS: "-" >
+| <LPAREN: "(" >
+| <RPAREN: ")" >
+| <COLON: ":" >
+| <CARAT: "^" > : Boost
+| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
+| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
+| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
+| <RANGEIN_START: "[" > : RangeIn
+| <RANGEEX_START: "{" > : RangeEx
+}
+
+<Boost> TOKEN : {
+<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
+}
+
+<RangeIn> TOKEN : {
+<RANGEIN_TO: "TO">
+| <RANGEIN_END: "]"> : DEFAULT
+| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
+| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
+}
+
+<RangeEx> TOKEN : {
+<RANGEEX_TO: "TO">
+| <RANGEEX_END: "}"> : DEFAULT
+| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
+| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
+}
+
+// * Query ::= ( Clause )*
+// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+
+int Conjunction() : {
+ int ret = CONJ_NONE;
+}
+{
+ [
+ <AND> { ret = CONJ_AND; }
+ | <OR> { ret = CONJ_OR; }
+ ]
+ { return ret; }
+}
+
+ModifierQueryNode.Modifier Modifiers() : {
+ ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE;
+}
+{
+ [
+ <PLUS> { ret = ModifierQueryNode.Modifier.MOD_REQ; }
+ | <MINUS> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
+ | <NOT> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
+ ]
+ { return ret; }
+}
+
+// This makes sure that there is no garbage after the query string
+QueryNode TopLevelQuery(CharSequence field) :
+{
+ QueryNode q;
+}
+{
+ q=Query(field) <EOF>
+ {
+ return q;
+ }
+}
+
+// These changes were made to introduce operator precedence:
+// - Clause() now returns a QueryNode.
+// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object
+// - Query does not consume conjunctions (AND, OR) anymore.
+// - This is now done by two new non-terminals: ConjClause and DisjClause
+// The parse tree looks similar to this:
+// Query ::= DisjQuery ( DisjQuery )*
+// DisjQuery ::= ConjQuery ( OR ConjQuery )*
+// ConjQuery ::= Clause ( AND Clause )*
+// Clause ::= [ Modifier ] ...
+
+
+QueryNode Query(CharSequence field) :
+{
+ Vector<QueryNode> clauses = null;
+ QueryNode c, first=null;
+}
+{
+ first=DisjQuery(field)
+ (
+ c=DisjQuery(field)
+ {
+ if (clauses == null) {
+ clauses = new Vector<QueryNode>();
+ clauses.addElement(first);
+ }
+ clauses.addElement(c);
+ }
+ )*
+ {
+ if (clauses != null) {
+ return new BooleanQueryNode(clauses);
+ } else {
+ return first;
+ }
+ }
+}
+
+QueryNode DisjQuery(CharSequence field) : {
+ QueryNode first, c;
+ Vector<QueryNode> clauses = null;
+}
+{
+ first = ConjQuery(field)
+ (
+ <OR> c=ConjQuery(field)
+ {
+ if (clauses == null) {
+ clauses = new Vector<QueryNode>();
+ clauses.addElement(first);
+ }
+ clauses.addElement(c);
+ }
+ )*
+ {
+ if (clauses != null) {
+ return new OrQueryNode(clauses);
+ } else {
+ return first;
+ }
+ }
+}
+
+QueryNode ConjQuery(CharSequence field) : {
+ QueryNode first, c;
+ Vector<QueryNode> clauses = null;
+}
+{
+ first = ModClause(field)
+ (
+ <AND> c=ModClause(field)
+ {
+ if (clauses == null) {
+ clauses = new Vector<QueryNode>();
+ clauses.addElement(first);
+ }
+ clauses.addElement(c);
+ }
+ )*
+ {
+ if (clauses != null) {
+ return new AndQueryNode(clauses);
+ } else {
+ return first;
+ }
+ }
+}
+
+// QueryNode Query(CharSequence field) :
+// {
+// List clauses = new ArrayList();
+// List modifiers = new ArrayList();
+// QueryNode q, firstQuery=null;
+// ModifierQueryNode.Modifier mods;
+// int conj;
+// }
+// {
+// mods=Modifiers() q=Clause(field)
+// {
+// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q;
+//
+// // do not create modifier nodes with MOD_NONE
+// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
+// q = new ModifierQueryNode(q, mods);
+// }
+// clauses.add(q);
+// }
+// (
+// conj=Conjunction() mods=Modifiers() q=Clause(field)
+// {
+// // do not create modifier nodes with MOD_NONE
+// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
+// q = new ModifierQueryNode(q, mods);
+// }
+// clauses.add(q);
+// //TODO: figure out what to do with AND and ORs
+// }
+// )*
+// {
+// if (clauses.size() == 1 && firstQuery != null)
+// return firstQuery;
+// else {
+// return new BooleanQueryNode(clauses);
+// }
+// }
+// }
+
+QueryNode ModClause(CharSequence field) : {
+ QueryNode q;
+ ModifierQueryNode.Modifier mods;
+}
+{
+ mods=Modifiers() q= Clause(field) {
+ if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
+ q = new ModifierQueryNode(q, mods);
+ }
+ return q;
+ }
+}
+
+QueryNode Clause(CharSequence field) : {
+ QueryNode q;
+ Token fieldToken=null, boost=null;
+ boolean group = false;
+}
+{
+ [
+ LOOKAHEAD(2)
+ (
+ fieldToken=<TERM> <COLON> {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}
+ )
+ ]
+
+ (
+ q=Term(field)
+ | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? {group=true;}
+
+ )
+ {
+ if (boost != null) {
+ float f = (float)1.0;
+ try {
+ f = Float.valueOf(boost.image).floatValue();
+ // avoid boosting null queries, such as those caused by stop words
+ if (q != null) {
+ q = new BoostQueryNode(q, f);
+ }
+ } catch (Exception ignored) {
+ /* Should this be handled somehow? (defaults to "no boost", if
+ * boost number is invalid)
+ */
+ }
+ }
+ if (group) { q = new GroupQueryNode(q);}
+ return q;
+ }
+}
+
+
+QueryNode Term(CharSequence field) : {
+ Token term, boost=null, fuzzySlop=null, goop1, goop2;
+ boolean fuzzy = false;
+ QueryNode q =null;
+ ParametricQueryNode qLower, qUpper;
+ float defaultMinSimilarity = 0.5f;
+}
+{
+ (
+ (
+ term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
+ | term=<NUMBER>
+ )
+ [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
+ [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
+ {
+ if (fuzzy) {
+ float fms = defaultMinSimilarity;
+ try {
+ fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
+ } catch (Exception ignored) { }
+ if(fms < 0.0f || fms > 1.0f){
+ throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
+ }
+ q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
+ }
+ }
+ | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
+ [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
+ <RANGEIN_END> )
+ [ <CARAT> boost=<NUMBER> ]
+ {
+ if (goop1.kind == RANGEIN_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ }
+ if (goop2.kind == RANGEIN_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ }
+
+ qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE,
+ EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
+ qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE,
+ EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
+ q = new ParametricRangeQueryNode(qLower, qUpper);
+ }
+ | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
+ [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
+ <RANGEEX_END> )
+ [ <CARAT> boost=<NUMBER> ]
+ {
+ if (goop1.kind == RANGEEX_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ }
+ if (goop2.kind == RANGEEX_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ }
+ qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT,
+ EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
+ qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT,
+ EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
+ q = new ParametricRangeQueryNode(qLower, qUpper);
+ }
+ | term=<QUOTED> {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);}
+ [ fuzzySlop=<FUZZY_SLOP> ]
+ [ <CARAT> boost=<NUMBER> ]
+ {
+ int phraseSlop = 0;
+
+ if (fuzzySlop != null) {
+ try {
+ phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
+ q = new SlopQueryNode(q, phraseSlop);
+ }
+ catch (Exception ignored) {
+ /* Should this be handled somehow? (defaults to "no PhraseSlop", if
+ * slop number is invalid)
+ */
+ }
+ }
+
+ }
+ )
+ {
+ if (boost != null) {
+ float f = (float)1.0;
+ try {
+ f = Float.valueOf(boost.image).floatValue();
+ // avoid boosting null queries, such as those caused by stop words
+ if (q != null) {
+ q = new BoostQueryNode(q, f);
+ }
+ } catch (Exception ignored) {
+ /* Should this be handled somehow? (defaults to "no boost", if
+ * boost number is invalid)
+ */
+ }
+ }
+ return q;
+ }
+}