+++ /dev/null
-/**
- * Standard file is based on the TextParser.jj from lucene 2.3
- */
-
-options {
- STATIC=false;
- JAVA_UNICODE_ESCAPE=true;
- USER_CHAR_STREAM=false;
- IGNORE_CASE=false;
- JDK_VERSION="1.5";
-}
-
-PARSER_BEGIN(StandardSyntaxParser)
-package org.apache.lucene.queryParser.standard.parser;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Vector;
-
-import org.apache.lucene.messages.Message;
-import org.apache.lucene.messages.MessageImpl;
-import org.apache.lucene.queryParser.core.QueryNodeError;
-import org.apache.lucene.queryParser.core.QueryNodeException;
-import org.apache.lucene.queryParser.core.QueryNodeParseException;
-import org.apache.lucene.queryParser.core.messages.QueryParserMessages;
-import org.apache.lucene.queryParser.core.nodes.AndQueryNode;
-import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
-import org.apache.lucene.queryParser.core.nodes.BoostQueryNode;
-import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
-import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
-import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
-import org.apache.lucene.queryParser.core.nodes.GroupQueryNode;
-import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
-import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
-import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
-import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
-import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
-import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
-import org.apache.lucene.queryParser.core.nodes.QueryNode;
-import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl;
-import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode;
-import org.apache.lucene.queryParser.core.parser.SyntaxParser;
-
-public class StandardSyntaxParser implements SyntaxParser {
-
- private static final int CONJ_NONE =0;
- private static final int CONJ_AND =2;
- private static final int CONJ_OR =2;
-
-
- // syntax parser constructor
- public StandardSyntaxParser() {
- this(new StringReader(""));
- }
- /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}.
- * @param query the query string to be parsed.
- * @throws ParseException if the parsing fails
- */
- public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
- ReInit(new StringReader(query.toString()));
- try {
- // TopLevelQuery is a Query followed by the end-of-input (EOF)
- QueryNode querynode = TopLevelQuery(field);
- return querynode;
- }
- catch (ParseException tme) {
- tme.setQuery(query);
- throw tme;
- }
- catch (Error tme) {
- Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage());
- QueryNodeParseException e = new QueryNodeParseException(tme);
- e.setQuery(query);
- e.setNonLocalizedMessage(message);
- throw e;
- }
- }
-
-}
-
-PARSER_END(StandardSyntaxParser)
-
-/* ***************** */
-/* Token Definitions */
-/* ***************** */
-
-<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// every character that follows a backslash is considered as an escaped character
-| <#_ESCAPED_CHAR: "\\" ~[] >
-| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
- "[", "]", "\"", "{", "}", "~", "\\" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
-| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
-}
-
-<DEFAULT, RangeIn, RangeEx> SKIP : {
- < <_WHITESPACE>>
-}
-
-<DEFAULT> TOKEN : {
- <AND: ("AND" | "&&") >
-| <OR: ("OR" | "||") >
-| <NOT: ("NOT" | "!") >
-| <PLUS: "+" >
-| <MINUS: "-" >
-| <LPAREN: "(" >
-| <RPAREN: ")" >
-| <COLON: ":" >
-| <CARAT: "^" > : Boost
-| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
-| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
-| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
-| <RANGEIN_START: "[" > : RangeIn
-| <RANGEEX_START: "{" > : RangeEx
-}
-
-<Boost> TOKEN : {
-<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
-}
-
-<RangeIn> TOKEN : {
-<RANGEIN_TO: "TO">
-| <RANGEIN_END: "]"> : DEFAULT
-| <RANGEIN_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
-| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
-}
-
-<RangeEx> TOKEN : {
-<RANGEEX_TO: "TO">
-| <RANGEEX_END: "}"> : DEFAULT
-| <RANGEEX_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
-| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
-}
-
-// * Query ::= ( Clause )*
-// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
-
-int Conjunction() : {
- int ret = CONJ_NONE;
-}
-{
- [
- <AND> { ret = CONJ_AND; }
- | <OR> { ret = CONJ_OR; }
- ]
- { return ret; }
-}
-
-ModifierQueryNode.Modifier Modifiers() : {
- ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE;
-}
-{
- [
- <PLUS> { ret = ModifierQueryNode.Modifier.MOD_REQ; }
- | <MINUS> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
- | <NOT> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
- ]
- { return ret; }
-}
-
-// This makes sure that there is no garbage after the query string
-QueryNode TopLevelQuery(CharSequence field) :
-{
- QueryNode q;
-}
-{
- q=Query(field) <EOF>
- {
- return q;
- }
-}
-
-// These changes were made to introduce operator precedence:
-// - Clause() now returns a QueryNode.
-// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object
-// - Query does not consume conjunctions (AND, OR) anymore.
-// - This is now done by two new non-terminals: ConjClause and DisjClause
-// The parse tree looks similar to this:
-// Query ::= DisjQuery ( DisjQuery )*
-// DisjQuery ::= ConjQuery ( OR ConjQuery )*
-// ConjQuery ::= Clause ( AND Clause )*
-// Clause ::= [ Modifier ] ...
-
-
-QueryNode Query(CharSequence field) :
-{
- Vector<QueryNode> clauses = null;
- QueryNode c, first=null;
-}
-{
- first=DisjQuery(field)
- (
- c=DisjQuery(field)
- {
- if (clauses == null) {
- clauses = new Vector<QueryNode>();
- clauses.addElement(first);
- }
- clauses.addElement(c);
- }
- )*
- {
- if (clauses != null) {
- return new BooleanQueryNode(clauses);
- } else {
- return first;
- }
- }
-}
-
-QueryNode DisjQuery(CharSequence field) : {
- QueryNode first, c;
- Vector<QueryNode> clauses = null;
-}
-{
- first = ConjQuery(field)
- (
- <OR> c=ConjQuery(field)
- {
- if (clauses == null) {
- clauses = new Vector<QueryNode>();
- clauses.addElement(first);
- }
- clauses.addElement(c);
- }
- )*
- {
- if (clauses != null) {
- return new OrQueryNode(clauses);
- } else {
- return first;
- }
- }
-}
-
-QueryNode ConjQuery(CharSequence field) : {
- QueryNode first, c;
- Vector<QueryNode> clauses = null;
-}
-{
- first = ModClause(field)
- (
- <AND> c=ModClause(field)
- {
- if (clauses == null) {
- clauses = new Vector<QueryNode>();
- clauses.addElement(first);
- }
- clauses.addElement(c);
- }
- )*
- {
- if (clauses != null) {
- return new AndQueryNode(clauses);
- } else {
- return first;
- }
- }
-}
-
-// QueryNode Query(CharSequence field) :
-// {
-// List clauses = new ArrayList();
-// List modifiers = new ArrayList();
-// QueryNode q, firstQuery=null;
-// ModifierQueryNode.Modifier mods;
-// int conj;
-// }
-// {
-// mods=Modifiers() q=Clause(field)
-// {
-// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q;
-//
-// // do not create modifier nodes with MOD_NONE
-// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
-// q = new ModifierQueryNode(q, mods);
-// }
-// clauses.add(q);
-// }
-// (
-// conj=Conjunction() mods=Modifiers() q=Clause(field)
-// {
-// // do not create modifier nodes with MOD_NONE
-// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
-// q = new ModifierQueryNode(q, mods);
-// }
-// clauses.add(q);
-// //TODO: figure out what to do with AND and ORs
-// }
-// )*
-// {
-// if (clauses.size() == 1 && firstQuery != null)
-// return firstQuery;
-// else {
-// return new BooleanQueryNode(clauses);
-// }
-// }
-// }
-
-QueryNode ModClause(CharSequence field) : {
- QueryNode q;
- ModifierQueryNode.Modifier mods;
-}
-{
- mods=Modifiers() q= Clause(field) {
- if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
- q = new ModifierQueryNode(q, mods);
- }
- return q;
- }
-}
-
-QueryNode Clause(CharSequence field) : {
- QueryNode q;
- Token fieldToken=null, boost=null;
- boolean group = false;
-}
-{
- [
- LOOKAHEAD(2)
- (
- fieldToken=<TERM> <COLON> {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}
- )
- ]
-
- (
- q=Term(field)
- | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? {group=true;}
-
- )
- {
- if (boost != null) {
- float f = (float)1.0;
- try {
- f = Float.valueOf(boost.image).floatValue();
- // avoid boosting null queries, such as those caused by stop words
- if (q != null) {
- q = new BoostQueryNode(q, f);
- }
- } catch (Exception ignored) {
- /* Should this be handled somehow? (defaults to "no boost", if
- * boost number is invalid)
- */
- }
- }
- if (group) { q = new GroupQueryNode(q);}
- return q;
- }
-}
-
-
-QueryNode Term(CharSequence field) : {
- Token term, boost=null, fuzzySlop=null, goop1, goop2;
- boolean fuzzy = false;
- QueryNode q =null;
- ParametricQueryNode qLower, qUpper;
- float defaultMinSimilarity = 0.5f;
-}
-{
- (
- (
- term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
- | term=<NUMBER>
- )
- [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
- [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
- {
- if (fuzzy) {
- float fms = defaultMinSimilarity;
- try {
- fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
- } catch (Exception ignored) { }
- if(fms < 0.0f || fms > 1.0f){
- throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
- }
- q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
- }
- }
- | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
- [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
- <RANGEIN_END> )
- [ <CARAT> boost=<NUMBER> ]
- {
- if (goop1.kind == RANGEIN_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- }
- if (goop2.kind == RANGEIN_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- }
-
- qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE,
- EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
- qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE,
- EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
- q = new ParametricRangeQueryNode(qLower, qUpper);
- }
- | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
- [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
- <RANGEEX_END> )
- [ <CARAT> boost=<NUMBER> ]
- {
- if (goop1.kind == RANGEEX_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- }
- if (goop2.kind == RANGEEX_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- }
- qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT,
- EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
- qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT,
- EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
- q = new ParametricRangeQueryNode(qLower, qUpper);
- }
- | term=<QUOTED> {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);}
- [ fuzzySlop=<FUZZY_SLOP> ]
- [ <CARAT> boost=<NUMBER> ]
- {
- int phraseSlop = 0;
-
- if (fuzzySlop != null) {
- try {
- phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
- q = new SlopQueryNode(q, phraseSlop);
- }
- catch (Exception ignored) {
- /* Should this be handled somehow? (defaults to "no PhraseSlop", if
- * slop number is invalid)
- */
- }
- }
-
- }
- )
- {
- if (boost != null) {
- float f = (float)1.0;
- try {
- f = Float.valueOf(boost.image).floatValue();
- // avoid boosting null queries, such as those caused by stop words
- if (q != null) {
- q = new BoostQueryNode(q, f);
- }
- } catch (Exception ignored) {
- /* Should this be handled somehow? (defaults to "no boost", if
- * boost number is invalid)
- */
- }
- }
- return q;
- }
-}