1 package org.apache.lucene.analysis.payloads;
3 * Licensed to the Apache Software Foundation (ASF) under one or more
4 * contributor license agreements. See the NOTICE file distributed with
5 * this work for additional information regarding copyright ownership.
6 * The ASF licenses this file to You under the Apache License, Version 2.0
7 * (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 import org.apache.lucene.analysis.MockTokenizer;
20 import org.apache.lucene.analysis.TokenStream;
21 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
22 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
23 import org.apache.lucene.index.Payload;
24 import org.apache.lucene.util.LuceneTestCase;
26 import java.io.StringReader;
28 public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
30 public void testPayloads() throws Exception {
31 String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
32 DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
33 (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
34 DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
35 CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
36 PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
37 assertTermEquals("The", filter, termAtt, payAtt, null);
38 assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
39 assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
40 assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
41 assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
42 assertTermEquals("over", filter, termAtt, payAtt, null);
43 assertTermEquals("the", filter, termAtt, payAtt, null);
44 assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
45 assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
46 assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
47 assertFalse(filter.incrementToken());
50 public void testNext() throws Exception {
52 String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
53 DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
54 (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
55 DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
56 assertTermEquals("The", filter, null);
57 assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
58 assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
59 assertTermEquals("fox", filter, "NN".getBytes("UTF-8"));
60 assertTermEquals("jumped", filter, "VB".getBytes("UTF-8"));
61 assertTermEquals("over", filter, null);
62 assertTermEquals("the", filter, null);
63 assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8"));
64 assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
65 assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
66 assertFalse(filter.incrementToken());
70 public void testFloatEncoding() throws Exception {
71 String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
72 DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
73 CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
74 PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
75 assertTermEquals("The", filter, termAtt, payAtt, null);
76 assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
77 assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
78 assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
79 assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
80 assertTermEquals("over", filter, termAtt, payAtt, null);
81 assertTermEquals("the", filter, termAtt, payAtt, null);
82 assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
83 assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
84 assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
85 assertFalse(filter.incrementToken());
88 public void testIntEncoding() throws Exception {
89 String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
90 DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
91 CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
92 PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
93 assertTermEquals("The", filter, termAtt, payAtt, null);
94 assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
95 assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
96 assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
97 assertTermEquals("jumped", filter, termAtt, payAtt, null);
98 assertTermEquals("over", filter, termAtt, payAtt, null);
99 assertTermEquals("the", filter, termAtt, payAtt, null);
100 assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
101 assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
102 assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
103 assertFalse(filter.incrementToken());
106 void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
107 CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
108 PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
110 assertTrue(stream.incrementToken());
111 assertEquals(expected, termAtt.toString());
112 Payload payload = payloadAtt.getPayload();
113 if (payload != null) {
114 assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
115 for (int i = 0; i < expectPay.length; i++) {
116 assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
120 assertTrue("expectPay is not null and it should be", expectPay == null);
125 void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
127 assertTrue(stream.incrementToken());
128 assertEquals(expected, termAtt.toString());
129 Payload payload = payAtt.getPayload();
130 if (payload != null) {
131 assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
132 for (int i = 0; i < expectPay.length; i++) {
133 assertTrue(expectPay[i] + " does not equal: " + payload.byteAt(i), expectPay[i] == payload.byteAt(i));
137 assertTrue("expectPay is not null and it should be", expectPay == null);