X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj diff --git a/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj b/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj new file mode 100644 index 0000000..a29964a --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj @@ -0,0 +1,454 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Surround query language parser */ + +/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */ + + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(QueryParser) + +package org.apache.lucene.queryParser.surround.parser; + +import java.util.ArrayList; +import java.util.List; +import java.io.StringReader; + + +import org.apache.lucene.analysis.TokenStream; + +import org.apache.lucene.queryParser.surround.query.SrndQuery; +import org.apache.lucene.queryParser.surround.query.FieldsQuery; +import org.apache.lucene.queryParser.surround.query.OrQuery; +import org.apache.lucene.queryParser.surround.query.AndQuery; +import org.apache.lucene.queryParser.surround.query.NotQuery; +import org.apache.lucene.queryParser.surround.query.DistanceQuery; +import org.apache.lucene.queryParser.surround.query.SrndTermQuery; +import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery; +import org.apache.lucene.queryParser.surround.query.SrndTruncQuery; + +/** + * This class is generated by JavaCC. The only method that clients should need + * to call is parse(). + */ + +public class QueryParser { + final int minimumPrefixLength = 3; + final int minimumCharsInTrunc = 3; + final String truncationErrorMessage = "Too unrestrictive truncation: "; + final String boostErrorMessage = "Cannot handle boost value: "; + + /* CHECKME: These should be the same as for the tokenizer. How? */ + final char truncator = '*'; + final char anyChar = '?'; + final char quote = '\"'; + final char fieldOperator = ':'; + final char comma = ','; /* prefix list separator */ + final char carat = '^'; /* weight operator */ + + static public SrndQuery parse(String query) throws ParseException { + QueryParser parser = new QueryParser(); + return parser.parse2(query); + } + + public QueryParser() { + this(new FastCharStream(new StringReader(""))); + } + + public SrndQuery parse2(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + return TopSrndQuery(); + } catch (TokenMgrError tme) { + throw new ParseException(tme.getMessage()); + } + } + + protected SrndQuery getFieldsQuery( + SrndQuery q, ArrayList fieldNames) { + /* FIXME: check acceptable subquery: at least one subquery should not be + * a fields query. + */ + return new FieldsQuery(q, fieldNames, fieldOperator); + } + + protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) { + return new OrQuery(queries, infix, orToken.image); + } + + protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) { + return new AndQuery( queries, infix, andToken.image); + } + + protected SrndQuery getNotQuery(List queries, Token notToken) { + return new NotQuery( queries, notToken.image); + } + + protected static int getOpDistance(String distanceOp) { + /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ + return distanceOp.length() == 1 + ? 1 + : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); + } + + protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) + throws ParseException { + String m = distq.distanceSubQueryNotAllowed(); + if (m != null) { + throw new ParseException("Operator " + opName + ": " + m); + } + } + + protected SrndQuery getDistanceQuery( + List queries, + boolean infix, + Token dToken, + boolean ordered) throws ParseException { + DistanceQuery dq = new DistanceQuery(queries, + infix, + getOpDistance(dToken.image), + dToken.image, + ordered); + checkDistanceSubQueries(dq, dToken.image); + return dq; + } + + protected SrndQuery getTermQuery( + String term, boolean quoted) { + return new SrndTermQuery(term, quoted); + } + + protected boolean allowedSuffix(String suffixed) { + return (suffixed.length() - 1) >= minimumPrefixLength; + } + + protected SrndQuery getPrefixQuery( + String prefix, boolean quoted) { + return new SrndPrefixQuery(prefix, quoted, truncator); + } + + protected boolean allowedTruncation(String truncated) { + /* At least 3 normal characters needed. */ + int nrNormalChars = 0; + for (int i = 0; i < truncated.length(); i++) { + char c = truncated.charAt(i); + if ((c != truncator) && (c != anyChar)) { + nrNormalChars++; + } + } + return nrNormalChars >= minimumCharsInTrunc; + } + + protected SrndQuery getTruncQuery(String truncated) { + return new SrndTruncQuery(truncated, truncator, anyChar); + } +} + +PARSER_END(QueryParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +| <#_TERM_CHAR: /* everything except whitespace and operators */ + ( ~[ " ", "\t", "\n", "\r", + ",", "?", "*", "(", ")", ":", "^", "\""] + ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > +| <#_STAR: "*" > /* term truncation */ +| <#_ONE_CHAR: "?" > /* precisely one character in a term */ +/* 2..99 prefix for distance operators */ +| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))> +} + + SKIP : { + < <_WHITESPACE>> +} + +/* Operator tokens (in increasing order of precedence): */ + TOKEN : +{ + +| +| +| )? ("W"|"w")> +| )? ("N"|"n")> +/* These are excluded in _TERM_CHAR: */ +| +| +| +| +| : Boost +/* Literal non empty term between single quotes, + * escape quoted quote or backslash by backslash. + * Evt. truncated. + */ +| > +| +| )+ <_STAR>> +| )+ + (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */ + (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )* + > +| )+> +} + + TOKEN : { +)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT +} + + +SrndQuery TopSrndQuery() : { + SrndQuery q; +}{ + q = FieldsQuery() + + {return q;} +} + + +SrndQuery FieldsQuery() : { + SrndQuery q; + ArrayList fieldNames; +}{ + fieldNames = OptionalFields() + q = OrQuery() + {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} +} + + +ArrayList OptionalFields() : { + Token fieldName; + ArrayList fieldNames = null; +}{ + ( LOOKAHEAD(2) // to the colon + fieldName = + { + if (fieldNames == null) { + fieldNames = new ArrayList(); + } + fieldNames.add(fieldName.image); + } + )* + {return fieldNames;} +} + + +SrndQuery OrQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = AndQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = AndQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} +} + + +SrndQuery AndQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = NotQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = NotQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} +} + + +SrndQuery NotQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = NQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = NQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getNotQuery(queries, oprt);} +} + + +SrndQuery NQuery() : { + SrndQuery q; + ArrayList queries; + Token dt; +}{ + q = WQuery() + ( dt = { + queries = new ArrayList(); + queries.add(q); /* left associative */ + } + q = WQuery() { + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); + } + )* + {return q;} +} + + +SrndQuery WQuery() : { + SrndQuery q; + ArrayList queries; + Token wt; +}{ + q = PrimaryQuery() + ( wt = { + queries = new ArrayList(); + queries.add(q); /* left associative */ + } + q = PrimaryQuery() { + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); + } + )* + {return q;} +} + + +SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */ + SrndQuery q; +}{ + ( q = FieldsQuery() + | q = PrefixOperatorQuery() + | q = SimpleTerm() + ) + OptionalWeights(q) + {return q;} +} + + +SrndQuery PrefixOperatorQuery() : { + Token oprt; + List queries; +}{ + ( oprt = /* prefix OR */ + queries = FieldsQueryList() + {return getOrQuery(queries, false /* not infix */, oprt);} + + | oprt = /* prefix AND */ + queries = FieldsQueryList() + {return getAndQuery(queries, false /* not infix */, oprt);} + + | oprt = /* prefix N */ + queries = FieldsQueryList() + {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);} + + | oprt = /* prefix W */ + queries = FieldsQueryList() + {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);} + ) +} + + +List FieldsQueryList() : { + SrndQuery q; + ArrayList queries = new ArrayList(); +}{ + + q = FieldsQuery() {queries.add(q);} + ( q = FieldsQuery() {queries.add(q);})+ + + {return queries;} +} + + +SrndQuery SimpleTerm() : { + Token term; +}{ + ( term= + {return getTermQuery(term.image, false /* not quoted */);} + + | term= + {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);} + + | term= { /* ending in * */ + if (! allowedSuffix(term.image)) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */); + } + + | term= { /* with at least one * or ? */ + if (! allowedTruncation(term.image)) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getTruncQuery(term.image); + } + + | term= { /* eg. "9b-b,m"* */ + if ((term.image.length() - 3) < minimumPrefixLength) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */); + } + ) +} + + +void OptionalWeights(SrndQuery q) : { + Token weight=null; +}{ + ( weight= { + float f; + try { + f = Float.valueOf(weight.image).floatValue(); + } catch (Exception floatExc) { + throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")"); + } + if (f <= 0.0) { + throw new ParseException(boostErrorMessage + weight.image); + } + q.setWeight(f * q.getWeight()); /* left associative, fwiw */ + } + )* +} +