--- /dev/null
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Surround query language parser */
+
+/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */
+
+
+options {
+ STATIC=false;
+ JAVA_UNICODE_ESCAPE=true;
+ USER_CHAR_STREAM=true;
+}
+
+PARSER_BEGIN(QueryParser)
+
+package org.apache.lucene.queryParser.surround.parser;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.io.StringReader;
+
+
+import org.apache.lucene.analysis.TokenStream;
+
+import org.apache.lucene.queryParser.surround.query.SrndQuery;
+import org.apache.lucene.queryParser.surround.query.FieldsQuery;
+import org.apache.lucene.queryParser.surround.query.OrQuery;
+import org.apache.lucene.queryParser.surround.query.AndQuery;
+import org.apache.lucene.queryParser.surround.query.NotQuery;
+import org.apache.lucene.queryParser.surround.query.DistanceQuery;
+import org.apache.lucene.queryParser.surround.query.SrndTermQuery;
+import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery;
+import org.apache.lucene.queryParser.surround.query.SrndTruncQuery;
+
+/**
+ * This class is generated by JavaCC. The only method that clients should need
+ * to call is <a href="#parse">parse()</a>.
+ */
+
+public class QueryParser {
+ final int minimumPrefixLength = 3;
+ final int minimumCharsInTrunc = 3;
+ final String truncationErrorMessage = "Too unrestrictive truncation: ";
+ final String boostErrorMessage = "Cannot handle boost value: ";
+
+ /* CHECKME: These should be the same as for the tokenizer. How? */
+ final char truncator = '*';
+ final char anyChar = '?';
+ final char quote = '\"';
+ final char fieldOperator = ':';
+ final char comma = ','; /* prefix list separator */
+ final char carat = '^'; /* weight operator */
+
+ static public SrndQuery parse(String query) throws ParseException {
+ QueryParser parser = new QueryParser();
+ return parser.parse2(query);
+ }
+
+ public QueryParser() {
+ this(new FastCharStream(new StringReader("")));
+ }
+
+ public SrndQuery parse2(String query) throws ParseException {
+ ReInit(new FastCharStream(new StringReader(query)));
+ try {
+ return TopSrndQuery();
+ } catch (TokenMgrError tme) {
+ throw new ParseException(tme.getMessage());
+ }
+ }
+
+ protected SrndQuery getFieldsQuery(
+ SrndQuery q, ArrayList<String> fieldNames) {
+ /* FIXME: check acceptable subquery: at least one subquery should not be
+ * a fields query.
+ */
+ return new FieldsQuery(q, fieldNames, fieldOperator);
+ }
+
+ protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) {
+ return new OrQuery(queries, infix, orToken.image);
+ }
+
+ protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) {
+ return new AndQuery( queries, infix, andToken.image);
+ }
+
+ protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) {
+ return new NotQuery( queries, notToken.image);
+ }
+
+ protected static int getOpDistance(String distanceOp) {
+ /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
+ return distanceOp.length() == 1
+ ? 1
+ : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
+ }
+
+ protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
+ throws ParseException {
+ String m = distq.distanceSubQueryNotAllowed();
+ if (m != null) {
+ throw new ParseException("Operator " + opName + ": " + m);
+ }
+ }
+
+ protected SrndQuery getDistanceQuery(
+ List<SrndQuery> queries,
+ boolean infix,
+ Token dToken,
+ boolean ordered) throws ParseException {
+ DistanceQuery dq = new DistanceQuery(queries,
+ infix,
+ getOpDistance(dToken.image),
+ dToken.image,
+ ordered);
+ checkDistanceSubQueries(dq, dToken.image);
+ return dq;
+ }
+
+ protected SrndQuery getTermQuery(
+ String term, boolean quoted) {
+ return new SrndTermQuery(term, quoted);
+ }
+
+ protected boolean allowedSuffix(String suffixed) {
+ return (suffixed.length() - 1) >= minimumPrefixLength;
+ }
+
+ protected SrndQuery getPrefixQuery(
+ String prefix, boolean quoted) {
+ return new SrndPrefixQuery(prefix, quoted, truncator);
+ }
+
+ protected boolean allowedTruncation(String truncated) {
+ /* At least 3 normal characters needed. */
+ int nrNormalChars = 0;
+ for (int i = 0; i < truncated.length(); i++) {
+ char c = truncated.charAt(i);
+ if ((c != truncator) && (c != anyChar)) {
+ nrNormalChars++;
+ }
+ }
+ return nrNormalChars >= minimumCharsInTrunc;
+ }
+
+ protected SrndQuery getTruncQuery(String truncated) {
+ return new SrndTruncQuery(truncated, truncator, anyChar);
+ }
+}
+
+PARSER_END(QueryParser)
+
+/* ***************** */
+/* Token Definitions */
+/* ***************** */
+
+<*> TOKEN : {
+ <#_NUM_CHAR: ["0"-"9"] >
+| <#_TERM_CHAR: /* everything except whitespace and operators */
+ ( ~[ " ", "\t", "\n", "\r",
+ ",", "?", "*", "(", ")", ":", "^", "\""]
+ ) >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
+| <#_STAR: "*" > /* term truncation */
+| <#_ONE_CHAR: "?" > /* precisely one character in a term */
+/* 2..99 prefix for distance operators */
+| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))>
+}
+
+<DEFAULT> SKIP : {
+ < <_WHITESPACE>>
+}
+
+/* Operator tokens (in increasing order of precedence): */
+<DEFAULT> TOKEN :
+{
+ <OR: "OR" | "or">
+| <AND: "AND" | "and">
+| <NOT: "NOT" | "not">
+| <W: (<_DISTOP_NUM>)? ("W"|"w")>
+| <N: (<_DISTOP_NUM>)? ("N"|"n")>
+/* These are excluded in _TERM_CHAR: */
+| <LPAREN: "(">
+| <RPAREN: ")">
+| <COMMA: ",">
+| <COLON: ":">
+| <CARAT: "^"> : Boost
+/* Literal non empty term between single quotes,
+ * escape quoted quote or backslash by backslash.
+ * Evt. truncated.
+ */
+| <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>>
+| <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\"">
+| <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>>
+| <TRUNCTERM: (<_TERM_CHAR>)+
+ (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */
+ (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )*
+ >
+| <TERM: (<_TERM_CHAR>)+>
+}
+
+<Boost> TOKEN : {
+<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT
+}
+
+
+SrndQuery TopSrndQuery() : {
+ SrndQuery q;
+}{
+ q = FieldsQuery()
+ <EOF>
+ {return q;}
+}
+
+
+SrndQuery FieldsQuery() : {
+ SrndQuery q;
+ ArrayList<String> fieldNames;
+}{
+ fieldNames = OptionalFields()
+ q = OrQuery()
+ {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
+}
+
+
+ArrayList<String> OptionalFields() : {
+ Token fieldName;
+ ArrayList<String> fieldNames = null;
+}{
+ ( LOOKAHEAD(2) // to the colon
+ fieldName = <TERM>
+ <COLON> {
+ if (fieldNames == null) {
+ fieldNames = new ArrayList<String>();
+ }
+ fieldNames.add(fieldName.image);
+ }
+ )*
+ {return fieldNames;}
+}
+
+
+SrndQuery OrQuery() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries = null;
+ Token oprt = null;
+}{
+ q = AndQuery()
+ ( oprt = <OR> { /* keep only last used operator */
+ if (queries == null) {
+ queries = new ArrayList<SrndQuery>();
+ queries.add(q);
+ }
+ }
+ q = AndQuery() {
+ queries.add(q);
+ }
+ )*
+ {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
+}
+
+
+SrndQuery AndQuery() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries = null;
+ Token oprt = null;
+}{
+ q = NotQuery()
+ ( oprt = <AND> { /* keep only last used operator */
+ if (queries == null) {
+ queries = new ArrayList<SrndQuery>();
+ queries.add(q);
+ }
+ }
+ q = NotQuery() {
+ queries.add(q);
+ }
+ )*
+ {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
+}
+
+
+SrndQuery NotQuery() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries = null;
+ Token oprt = null;
+}{
+ q = NQuery()
+ ( oprt = <NOT> { /* keep only last used operator */
+ if (queries == null) {
+ queries = new ArrayList<SrndQuery>();
+ queries.add(q);
+ }
+ }
+ q = NQuery() {
+ queries.add(q);
+ }
+ )*
+ {return (queries == null) ? q : getNotQuery(queries, oprt);}
+}
+
+
+SrndQuery NQuery() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries;
+ Token dt;
+}{
+ q = WQuery()
+ ( dt = <N> {
+ queries = new ArrayList<SrndQuery>();
+ queries.add(q); /* left associative */
+ }
+ q = WQuery() {
+ queries.add(q);
+ q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
+ }
+ )*
+ {return q;}
+}
+
+
+SrndQuery WQuery() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries;
+ Token wt;
+}{
+ q = PrimaryQuery()
+ ( wt = <W> {
+ queries = new ArrayList<SrndQuery>();
+ queries.add(q); /* left associative */
+ }
+ q = PrimaryQuery() {
+ queries.add(q);
+ q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
+ }
+ )*
+ {return q;}
+}
+
+
+SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */
+ SrndQuery q;
+}{
+ ( <LPAREN> q = FieldsQuery() <RPAREN>
+ | q = PrefixOperatorQuery()
+ | q = SimpleTerm()
+ )
+ OptionalWeights(q)
+ {return q;}
+}
+
+
+SrndQuery PrefixOperatorQuery() : {
+ Token oprt;
+ List<SrndQuery> queries;
+}{
+ ( oprt = <OR> /* prefix OR */
+ queries = FieldsQueryList()
+ {return getOrQuery(queries, false /* not infix */, oprt);}
+
+ | oprt = <AND> /* prefix AND */
+ queries = FieldsQueryList()
+ {return getAndQuery(queries, false /* not infix */, oprt);}
+
+ | oprt = <N> /* prefix N */
+ queries = FieldsQueryList()
+ {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
+
+ | oprt = <W> /* prefix W */
+ queries = FieldsQueryList()
+ {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
+ )
+}
+
+
+List<SrndQuery> FieldsQueryList() : {
+ SrndQuery q;
+ ArrayList<SrndQuery> queries = new ArrayList<SrndQuery>();
+}{
+ <LPAREN>
+ q = FieldsQuery() {queries.add(q);}
+ (<COMMA> q = FieldsQuery() {queries.add(q);})+
+ <RPAREN>
+ {return queries;}
+}
+
+
+SrndQuery SimpleTerm() : {
+ Token term;
+}{
+ ( term=<TERM>
+ {return getTermQuery(term.image, false /* not quoted */);}
+
+ | term=<QUOTED>
+ {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
+
+ | term=<SUFFIXTERM> { /* ending in * */
+ if (! allowedSuffix(term.image)) {
+ throw new ParseException(truncationErrorMessage + term.image);
+ }
+ return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);
+ }
+
+ | term=<TRUNCTERM> { /* with at least one * or ? */
+ if (! allowedTruncation(term.image)) {
+ throw new ParseException(truncationErrorMessage + term.image);
+ }
+ return getTruncQuery(term.image);
+ }
+
+ | term=<TRUNCQUOTED> { /* eg. "9b-b,m"* */
+ if ((term.image.length() - 3) < minimumPrefixLength) {
+ throw new ParseException(truncationErrorMessage + term.image);
+ }
+ return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);
+ }
+ )
+}
+
+
+void OptionalWeights(SrndQuery q) : {
+ Token weight=null;
+}{
+ ( <CARAT> weight=<NUMBER> {
+ float f;
+ try {
+ f = Float.valueOf(weight.image).floatValue();
+ } catch (Exception floatExc) {
+ throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");
+ }
+ if (f <= 0.0) {
+ throw new ParseException(boostErrorMessage + weight.image);
+ }
+ q.setWeight(f * q.getWeight()); /* left associative, fwiw */
+ }
+ )*
+}
+