1 package org.apache.lucene.queryParser;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Reader;
22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.LowerCaseFilter;
24 import org.apache.lucene.analysis.TokenFilter;
25 import org.apache.lucene.analysis.TokenStream;
26 import org.apache.lucene.analysis.standard.StandardTokenizer;
27 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
28 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
29 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
30 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
31 import org.apache.lucene.search.Query;
32 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
33 import org.apache.lucene.util.Version;
36 * Test QueryParser's ability to deal with Analyzers that return more
37 * than one token per position or that return tokens with a position
41 public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
43 private static int multiToken = 0;
45 public void testMultiAnalyzer() throws ParseException {
47 QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "", new MultiAnalyzer());
49 // trivial, no multiple tokens:
50 assertEquals("foo", qp.parse("foo").toString());
51 assertEquals("foo", qp.parse("\"foo\"").toString());
52 assertEquals("foo foobar", qp.parse("foo foobar").toString());
53 assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString());
54 assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"").toString());
56 // two tokens at the same position:
57 assertEquals("(multi multi2) foo", qp.parse("multi foo").toString());
58 assertEquals("foo (multi multi2)", qp.parse("foo multi").toString());
59 assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi").toString());
60 assertEquals("+(foo (multi multi2)) +(bar (multi multi2))",
61 qp.parse("+(foo multi) +(bar multi)").toString());
62 assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"",
63 qp.parse("+(foo multi) field:\"bar multi\"").toString());
66 assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString());
67 assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString());
68 assertEquals("\"foo (multi multi2) foobar (multi multi2)\"",
69 qp.parse("\"foo multi foobar multi\"").toString());
72 assertEquals("(field:multi field:multi2) field:foo", qp.parse("field:multi field:foo").toString());
73 assertEquals("field:\"(multi multi2) foo\"", qp.parse("field:\"multi foo\"").toString());
75 // three tokens at one position:
76 assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti").toString());
77 assertEquals("foo (triplemulti multi3 multi2) foobar",
78 qp.parse("foo triplemulti foobar").toString());
80 // phrase with non-default slop:
81 assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10").toString());
83 // phrase with non-default boost:
84 assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2").toString());
86 // phrase after changing default slop
88 assertEquals("\"(multi multi2) foo\"~99 bar",
89 qp.parse("\"multi foo\" bar").toString());
90 assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2",
91 qp.parse("\"multi foo\" \"foo bar\"~2").toString());
94 // non-default operator:
95 qp.setDefaultOperator(QueryParser.AND_OPERATOR);
96 assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString());
100 public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException {
102 DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer());
103 qp.setPhraseSlop(99); // modified default slop
105 // direct call to (super's) getFieldQuery to demonstrate differnce
106 // between phrase and multiphrase with modified default slop
107 assertEquals("\"foo bar\"~99",
108 qp.getSuperFieldQuery("","foo bar", true).toString());
109 assertEquals("\"(multi multi2) bar\"~99",
110 qp.getSuperFieldQuery("","multi bar", true).toString());
113 // ask sublcass to parse phrase with modified default slop
114 assertEquals("\"(multi multi2) foo\"~99 bar",
115 qp.parse("\"multi foo\" bar").toString());
119 public void testPosIncrementAnalyzer() throws ParseException {
120 QueryParser qp = new QueryParser(Version.LUCENE_24, "", new PosIncrementAnalyzer());
121 assertEquals("quick brown", qp.parse("the quick brown").toString());
122 assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());
123 assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());
124 assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"").toString());
128 * Expands "multi" to "multi" and "multi2", both at the same position,
129 * and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
131 private class MultiAnalyzer extends Analyzer {
133 public MultiAnalyzer() {
137 public TokenStream tokenStream(String fieldName, Reader reader) {
138 TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
139 result = new TestFilter(result);
140 result = new LowerCaseFilter(TEST_VERSION_CURRENT, result);
145 private final class TestFilter extends TokenFilter {
147 private String prevType;
148 private int prevStartOffset;
149 private int prevEndOffset;
151 CharTermAttribute termAtt;
152 PositionIncrementAttribute posIncrAtt;
153 OffsetAttribute offsetAtt;
154 TypeAttribute typeAtt;
156 public TestFilter(TokenStream in) {
158 termAtt = addAttribute(CharTermAttribute.class);
159 posIncrAtt = addAttribute(PositionIncrementAttribute.class);
160 offsetAtt = addAttribute(OffsetAttribute.class);
161 typeAtt = addAttribute(TypeAttribute.class);
165 public final boolean incrementToken() throws java.io.IOException {
166 if (multiToken > 0) {
167 termAtt.setEmpty().append("multi"+(multiToken+1));
168 offsetAtt.setOffset(prevStartOffset, prevEndOffset);
169 typeAtt.setType(prevType);
170 posIncrAtt.setPositionIncrement(0);
174 boolean next = input.incrementToken();
178 prevType = typeAtt.type();
179 prevStartOffset = offsetAtt.startOffset();
180 prevEndOffset = offsetAtt.endOffset();
181 String text = termAtt.toString();
182 if (text.equals("triplemulti")) {
185 } else if (text.equals("multi")) {
196 * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
197 * Does not work correctly for input other than "the quick brown ...".
199 private class PosIncrementAnalyzer extends Analyzer {
201 public PosIncrementAnalyzer() {
205 public TokenStream tokenStream(String fieldName, Reader reader) {
206 TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
207 result = new TestPosIncrementFilter(result);
208 result = new LowerCaseFilter(TEST_VERSION_CURRENT, result);
213 private final class TestPosIncrementFilter extends TokenFilter {
215 CharTermAttribute termAtt;
216 PositionIncrementAttribute posIncrAtt;
218 public TestPosIncrementFilter(TokenStream in) {
220 termAtt = addAttribute(CharTermAttribute.class);
221 posIncrAtt = addAttribute(PositionIncrementAttribute.class);
225 public final boolean incrementToken () throws java.io.IOException {
226 while(input.incrementToken()) {
227 if (termAtt.toString().equals("the")) {
228 // stopword, do nothing
229 } else if (termAtt.toString().equals("quick")) {
230 posIncrAtt.setPositionIncrement(2);
233 posIncrAtt.setPositionIncrement(1);
241 /** a very simple subclass of QueryParser */
242 private final static class DumbQueryParser extends QueryParser {
244 public DumbQueryParser(String f, Analyzer a) {
245 super(TEST_VERSION_CURRENT, f, a);
248 /** expose super's version */
249 public Query getSuperFieldQuery(String f, String t, boolean quoted)
250 throws ParseException {
251 return super.getFieldQuery(f,t,quoted);
253 /** wrap super's version */
255 protected Query getFieldQuery(String f, String t, boolean quoted)
256 throws ParseException {
257 return new DumbQueryWrapper(getSuperFieldQuery(f,t,quoted));
262 * A very simple wrapper to prevent instanceof checks but uses
263 * the toString of the query it wraps.
265 private final static class DumbQueryWrapper extends Query {
268 public DumbQueryWrapper(Query q) {
273 public String toString(String f) {
274 return q.toString(f);