2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 /* Surround query language parser */
20 /* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */
25 JAVA_UNICODE_ESCAPE=true;
26 USER_CHAR_STREAM=true;
29 PARSER_BEGIN(QueryParser)
31 package org.apache.lucene.queryParser.surround.parser;
33 import java.util.ArrayList;
34 import java.util.List;
35 import java.io.StringReader;
38 import org.apache.lucene.analysis.TokenStream;
40 import org.apache.lucene.queryParser.surround.query.SrndQuery;
41 import org.apache.lucene.queryParser.surround.query.FieldsQuery;
42 import org.apache.lucene.queryParser.surround.query.OrQuery;
43 import org.apache.lucene.queryParser.surround.query.AndQuery;
44 import org.apache.lucene.queryParser.surround.query.NotQuery;
45 import org.apache.lucene.queryParser.surround.query.DistanceQuery;
46 import org.apache.lucene.queryParser.surround.query.SrndTermQuery;
47 import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery;
48 import org.apache.lucene.queryParser.surround.query.SrndTruncQuery;
51 * This class is generated by JavaCC. The only method that clients should need
52 * to call is <a href="#parse">parse()</a>.
55 public class QueryParser {
56 final int minimumPrefixLength = 3;
57 final int minimumCharsInTrunc = 3;
58 final String truncationErrorMessage = "Too unrestrictive truncation: ";
59 final String boostErrorMessage = "Cannot handle boost value: ";
61 /* CHECKME: These should be the same as for the tokenizer. How? */
62 final char truncator = '*';
63 final char anyChar = '?';
64 final char quote = '\"';
65 final char fieldOperator = ':';
66 final char comma = ','; /* prefix list separator */
67 final char carat = '^'; /* weight operator */
69 static public SrndQuery parse(String query) throws ParseException {
70 QueryParser parser = new QueryParser();
71 return parser.parse2(query);
74 public QueryParser() {
75 this(new FastCharStream(new StringReader("")));
78 public SrndQuery parse2(String query) throws ParseException {
79 ReInit(new FastCharStream(new StringReader(query)));
81 return TopSrndQuery();
82 } catch (TokenMgrError tme) {
83 throw new ParseException(tme.getMessage());
87 protected SrndQuery getFieldsQuery(
88 SrndQuery q, ArrayList<String> fieldNames) {
89 /* FIXME: check acceptable subquery: at least one subquery should not be
92 return new FieldsQuery(q, fieldNames, fieldOperator);
95 protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) {
96 return new OrQuery(queries, infix, orToken.image);
99 protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) {
100 return new AndQuery( queries, infix, andToken.image);
103 protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) {
104 return new NotQuery( queries, notToken.image);
107 protected static int getOpDistance(String distanceOp) {
108 /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
109 return distanceOp.length() == 1
111 : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
114 protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
115 throws ParseException {
116 String m = distq.distanceSubQueryNotAllowed();
118 throw new ParseException("Operator " + opName + ": " + m);
122 protected SrndQuery getDistanceQuery(
123 List<SrndQuery> queries,
126 boolean ordered) throws ParseException {
127 DistanceQuery dq = new DistanceQuery(queries,
129 getOpDistance(dToken.image),
132 checkDistanceSubQueries(dq, dToken.image);
136 protected SrndQuery getTermQuery(
137 String term, boolean quoted) {
138 return new SrndTermQuery(term, quoted);
141 protected boolean allowedSuffix(String suffixed) {
142 return (suffixed.length() - 1) >= minimumPrefixLength;
145 protected SrndQuery getPrefixQuery(
146 String prefix, boolean quoted) {
147 return new SrndPrefixQuery(prefix, quoted, truncator);
150 protected boolean allowedTruncation(String truncated) {
151 /* At least 3 normal characters needed. */
152 int nrNormalChars = 0;
153 for (int i = 0; i < truncated.length(); i++) {
154 char c = truncated.charAt(i);
155 if ((c != truncator) && (c != anyChar)) {
159 return nrNormalChars >= minimumCharsInTrunc;
162 protected SrndQuery getTruncQuery(String truncated) {
163 return new SrndTruncQuery(truncated, truncator, anyChar);
167 PARSER_END(QueryParser)
169 /* ***************** */
170 /* Token Definitions */
171 /* ***************** */
174 <#_NUM_CHAR: ["0"-"9"] >
175 | <#_TERM_CHAR: /* everything except whitespace and operators */
176 ( ~[ " ", "\t", "\n", "\r",
177 ",", "?", "*", "(", ")", ":", "^", "\""]
179 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
180 | <#_STAR: "*" > /* term truncation */
181 | <#_ONE_CHAR: "?" > /* precisely one character in a term */
182 /* 2..99 prefix for distance operators */
183 | <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))>
190 /* Operator tokens (in increasing order of precedence): */
194 | <AND: "AND" | "and">
195 | <NOT: "NOT" | "not">
196 | <W: (<_DISTOP_NUM>)? ("W"|"w")>
197 | <N: (<_DISTOP_NUM>)? ("N"|"n")>
198 /* These are excluded in _TERM_CHAR: */
203 | <CARAT: "^"> : Boost
204 /* Literal non empty term between single quotes,
205 * escape quoted quote or backslash by backslash.
208 | <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>>
209 | <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\"">
210 | <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>>
211 | <TRUNCTERM: (<_TERM_CHAR>)+
212 (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */
213 (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )*
215 | <TERM: (<_TERM_CHAR>)+>
219 <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT
223 SrndQuery TopSrndQuery() : {
232 SrndQuery FieldsQuery() : {
234 ArrayList<String> fieldNames;
236 fieldNames = OptionalFields()
238 {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
242 ArrayList<String> OptionalFields() : {
244 ArrayList<String> fieldNames = null;
246 ( LOOKAHEAD(2) // to the colon
249 if (fieldNames == null) {
250 fieldNames = new ArrayList<String>();
252 fieldNames.add(fieldName.image);
259 SrndQuery OrQuery() : {
261 ArrayList<SrndQuery> queries = null;
265 ( oprt = <OR> { /* keep only last used operator */
266 if (queries == null) {
267 queries = new ArrayList<SrndQuery>();
275 {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
279 SrndQuery AndQuery() : {
281 ArrayList<SrndQuery> queries = null;
285 ( oprt = <AND> { /* keep only last used operator */
286 if (queries == null) {
287 queries = new ArrayList<SrndQuery>();
295 {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
299 SrndQuery NotQuery() : {
301 ArrayList<SrndQuery> queries = null;
305 ( oprt = <NOT> { /* keep only last used operator */
306 if (queries == null) {
307 queries = new ArrayList<SrndQuery>();
315 {return (queries == null) ? q : getNotQuery(queries, oprt);}
319 SrndQuery NQuery() : {
321 ArrayList<SrndQuery> queries;
326 queries = new ArrayList<SrndQuery>();
327 queries.add(q); /* left associative */
331 q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
338 SrndQuery WQuery() : {
340 ArrayList<SrndQuery> queries;
345 queries = new ArrayList<SrndQuery>();
346 queries.add(q); /* left associative */
350 q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
357 SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */
360 ( <LPAREN> q = FieldsQuery() <RPAREN>
361 | q = PrefixOperatorQuery()
369 SrndQuery PrefixOperatorQuery() : {
371 List<SrndQuery> queries;
373 ( oprt = <OR> /* prefix OR */
374 queries = FieldsQueryList()
375 {return getOrQuery(queries, false /* not infix */, oprt);}
377 | oprt = <AND> /* prefix AND */
378 queries = FieldsQueryList()
379 {return getAndQuery(queries, false /* not infix */, oprt);}
381 | oprt = <N> /* prefix N */
382 queries = FieldsQueryList()
383 {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
385 | oprt = <W> /* prefix W */
386 queries = FieldsQueryList()
387 {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
392 List<SrndQuery> FieldsQueryList() : {
394 ArrayList<SrndQuery> queries = new ArrayList<SrndQuery>();
397 q = FieldsQuery() {queries.add(q);}
398 (<COMMA> q = FieldsQuery() {queries.add(q);})+
404 SrndQuery SimpleTerm() : {
408 {return getTermQuery(term.image, false /* not quoted */);}
411 {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
413 | term=<SUFFIXTERM> { /* ending in * */
414 if (! allowedSuffix(term.image)) {
415 throw new ParseException(truncationErrorMessage + term.image);
417 return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);
420 | term=<TRUNCTERM> { /* with at least one * or ? */
421 if (! allowedTruncation(term.image)) {
422 throw new ParseException(truncationErrorMessage + term.image);
424 return getTruncQuery(term.image);
427 | term=<TRUNCQUOTED> { /* eg. "9b-b,m"* */
428 if ((term.image.length() - 3) < minimumPrefixLength) {
429 throw new ParseException(truncationErrorMessage + term.image);
431 return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);
437 void OptionalWeights(SrndQuery q) : {
440 ( <CARAT> weight=<NUMBER> {
443 f = Float.valueOf(weight.image).floatValue();
444 } catch (Exception floatExc) {
445 throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");
448 throw new ParseException(boostErrorMessage + weight.image);
450 q.setWeight(f * q.getWeight()); /* left associative, fwiw */