X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj diff --git a/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj b/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj deleted file mode 100644 index bec992a..0000000 --- a/lucene-java-3.4.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj +++ /dev/null @@ -1,472 +0,0 @@ -/** - * Standard file is based on the TextParser.jj from lucene 2.3 - */ - -options { - STATIC=false; - JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=false; - IGNORE_CASE=false; - JDK_VERSION="1.5"; -} - -PARSER_BEGIN(StandardSyntaxParser) -package org.apache.lucene.queryParser.standard.parser; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Vector; - -import org.apache.lucene.messages.Message; -import org.apache.lucene.messages.MessageImpl; -import org.apache.lucene.queryParser.core.QueryNodeError; -import org.apache.lucene.queryParser.core.QueryNodeException; -import org.apache.lucene.queryParser.core.QueryNodeParseException; -import org.apache.lucene.queryParser.core.messages.QueryParserMessages; -import org.apache.lucene.queryParser.core.nodes.AndQueryNode; -import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; -import org.apache.lucene.queryParser.core.nodes.BoostQueryNode; -import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; -import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; -import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode; -import org.apache.lucene.queryParser.core.nodes.GroupQueryNode; -import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; -import org.apache.lucene.queryParser.core.nodes.OrQueryNode; -import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; -import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; -import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; -import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; -import org.apache.lucene.queryParser.core.nodes.QueryNode; -import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl; -import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode; -import org.apache.lucene.queryParser.core.parser.SyntaxParser; - -public class StandardSyntaxParser implements SyntaxParser { - - private static final int CONJ_NONE =0; - private static final int CONJ_AND =2; - private static final int CONJ_OR =2; - - - // syntax parser constructor - public StandardSyntaxParser() { - this(new StringReader("")); - } - /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}. - * @param query the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { - ReInit(new StringReader(query.toString())); - try { - // TopLevelQuery is a Query followed by the end-of-input (EOF) - QueryNode querynode = TopLevelQuery(field); - return querynode; - } - catch (ParseException tme) { - tme.setQuery(query); - throw tme; - } - catch (Error tme) { - Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); - QueryNodeParseException e = new QueryNodeParseException(tme); - e.setQuery(query); - e.setNonLocalizedMessage(message); - throw e; - } - } - -} - -PARSER_END(StandardSyntaxParser) - -/* ***************** */ -/* Token Definitions */ -/* ***************** */ - -<*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -// every character that follows a backslash is considered as an escaped character -| <#_ESCAPED_CHAR: "\\" ~[] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "\\" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > -| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > -} - - SKIP : { - < <_WHITESPACE>> -} - - TOKEN : { - -| -| -| -| -| -| -| -| : Boost -| )* "\""> -| (<_TERM_CHAR>)* > -| )+ ( "." (<_NUM_CHAR>)+ )? )? > -| : RangeIn -| : RangeEx -} - - TOKEN : { -)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT -} - - TOKEN : { - -| : DEFAULT -| -| -} - - TOKEN : { - -| : DEFAULT -| -| -} - -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) - -int Conjunction() : { - int ret = CONJ_NONE; -} -{ - [ - { ret = CONJ_AND; } - | { ret = CONJ_OR; } - ] - { return ret; } -} - -ModifierQueryNode.Modifier Modifiers() : { - ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; -} -{ - [ - { ret = ModifierQueryNode.Modifier.MOD_REQ; } - | { ret = ModifierQueryNode.Modifier.MOD_NOT; } - | { ret = ModifierQueryNode.Modifier.MOD_NOT; } - ] - { return ret; } -} - -// This makes sure that there is no garbage after the query string -QueryNode TopLevelQuery(CharSequence field) : -{ - QueryNode q; -} -{ - q=Query(field) - { - return q; - } -} - -// These changes were made to introduce operator precedence: -// - Clause() now returns a QueryNode. -// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object -// - Query does not consume conjunctions (AND, OR) anymore. -// - This is now done by two new non-terminals: ConjClause and DisjClause -// The parse tree looks similar to this: -// Query ::= DisjQuery ( DisjQuery )* -// DisjQuery ::= ConjQuery ( OR ConjQuery )* -// ConjQuery ::= Clause ( AND Clause )* -// Clause ::= [ Modifier ] ... - - -QueryNode Query(CharSequence field) : -{ - Vector clauses = null; - QueryNode c, first=null; -} -{ - first=DisjQuery(field) - ( - c=DisjQuery(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* - { - if (clauses != null) { - return new BooleanQueryNode(clauses); - } else { - return first; - } - } -} - -QueryNode DisjQuery(CharSequence field) : { - QueryNode first, c; - Vector clauses = null; -} -{ - first = ConjQuery(field) - ( - c=ConjQuery(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* - { - if (clauses != null) { - return new OrQueryNode(clauses); - } else { - return first; - } - } -} - -QueryNode ConjQuery(CharSequence field) : { - QueryNode first, c; - Vector clauses = null; -} -{ - first = ModClause(field) - ( - c=ModClause(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* - { - if (clauses != null) { - return new AndQueryNode(clauses); - } else { - return first; - } - } -} - -// QueryNode Query(CharSequence field) : -// { -// List clauses = new ArrayList(); -// List modifiers = new ArrayList(); -// QueryNode q, firstQuery=null; -// ModifierQueryNode.Modifier mods; -// int conj; -// } -// { -// mods=Modifiers() q=Clause(field) -// { -// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; -// -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// } -// ( -// conj=Conjunction() mods=Modifiers() q=Clause(field) -// { -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// //TODO: figure out what to do with AND and ORs -// } -// )* -// { -// if (clauses.size() == 1 && firstQuery != null) -// return firstQuery; -// else { -// return new BooleanQueryNode(clauses); -// } -// } -// } - -QueryNode ModClause(CharSequence field) : { - QueryNode q; - ModifierQueryNode.Modifier mods; -} -{ - mods=Modifiers() q= Clause(field) { - if (mods != ModifierQueryNode.Modifier.MOD_NONE) { - q = new ModifierQueryNode(q, mods); - } - return q; - } -} - -QueryNode Clause(CharSequence field) : { - QueryNode q; - Token fieldToken=null, boost=null; - boolean group = false; -} -{ - [ - LOOKAHEAD(2) - ( - fieldToken= {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} - ) - ] - - ( - q=Term(field) - | q=Query(field) ( boost=)? {group=true;} - - ) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - } - if (group) { q = new GroupQueryNode(q);} - return q; - } -} - - -QueryNode Term(CharSequence field) : { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean fuzzy = false; - QueryNode q =null; - ParametricQueryNode qLower, qUpper; - float defaultMinSimilarity = 0.5f; -} -{ - ( - ( - term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } - | term= - ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] - { - if (fuzzy) { - float fms = defaultMinSimilarity; - try { - fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); - } catch (Exception ignored) { } - if(fms < 0.0f || fms > 1.0f){ - throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); - } - q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); - } - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } - - qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE, - EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); - qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE, - EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); - q = new ParametricRangeQueryNode(qLower, qUpper); - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } - qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT, - EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); - qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT, - EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); - q = new ParametricRangeQueryNode(qLower, qUpper); - } - | term= {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);} - [ fuzzySlop= ] - [ boost= ] - { - int phraseSlop = 0; - - if (fuzzySlop != null) { - try { - phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - q = new SlopQueryNode(q, phraseSlop); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no PhraseSlop", if - * slop number is invalid) - */ - } - } - - } - ) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - } - return q; - } -}