1 package org.apache.lucene.analysis;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import org.apache.lucene.index.Payload;
21 import org.apache.lucene.analysis.tokenattributes.*;
22 import org.apache.lucene.util.LuceneTestCase;
23 import org.apache.lucene.util.Attribute;
24 import org.apache.lucene.util.AttributeImpl;
25 import org.apache.lucene.util._TestUtil;
27 import java.io.StringReader;
28 import java.util.HashMap;
30 public class TestToken extends LuceneTestCase {
32 public void testCtor() throws Exception {
33 Token t = new Token();
34 char[] content = "hello".toCharArray();
35 t.setTermBuffer(content, 0, content.length);
36 assertNotSame(t.termBuffer(), content);
37 assertEquals(0, t.startOffset());
38 assertEquals(0, t.endOffset());
39 assertEquals("hello", t.term());
40 assertEquals("word", t.type());
41 assertEquals(0, t.getFlags());
44 t.setTermBuffer(content, 0, content.length);
45 assertEquals("hello", t.term());
46 assertEquals("hello", t.toString());
47 assertEquals(6, t.startOffset());
48 assertEquals(22, t.endOffset());
49 assertEquals("word", t.type());
50 assertEquals(0, t.getFlags());
52 t = new Token(6, 22, 7);
53 t.setTermBuffer(content, 0, content.length);
54 assertEquals("hello", t.term());
55 assertEquals("hello", t.toString());
56 assertEquals(6, t.startOffset());
57 assertEquals(22, t.endOffset());
58 assertEquals("word", t.type());
59 assertEquals(7, t.getFlags());
61 t = new Token(6, 22, "junk");
62 t.setTermBuffer(content, 0, content.length);
63 assertEquals("hello", t.term());
64 assertEquals("hello", t.toString());
65 assertEquals(6, t.startOffset());
66 assertEquals(22, t.endOffset());
67 assertEquals("junk", t.type());
68 assertEquals(0, t.getFlags());
71 public void testResize() {
72 Token t = new Token();
73 char[] content = "hello".toCharArray();
74 t.setTermBuffer(content, 0, content.length);
75 for (int i = 0; i < 2000; i++)
77 t.resizeTermBuffer(i);
78 assertTrue(i <= t.termBuffer().length);
79 assertEquals("hello", t.term());
83 public void testGrow() {
84 Token t = new Token();
85 StringBuilder buf = new StringBuilder("ab");
86 for (int i = 0; i < 20; i++)
88 char[] content = buf.toString().toCharArray();
89 t.setTermBuffer(content, 0, content.length);
90 assertEquals(buf.length(), t.termLength());
91 assertEquals(buf.toString(), t.term());
92 buf.append(buf.toString());
94 assertEquals(1048576, t.termLength());
96 // now as a string, first variant
98 buf = new StringBuilder("ab");
99 for (int i = 0; i < 20; i++)
101 String content = buf.toString();
102 t.setTermBuffer(content, 0, content.length());
103 assertEquals(content.length(), t.termLength());
104 assertEquals(content, t.term());
107 assertEquals(1048576, t.termLength());
109 // now as a string, second variant
111 buf = new StringBuilder("ab");
112 for (int i = 0; i < 20; i++)
114 String content = buf.toString();
115 t.setTermBuffer(content);
116 assertEquals(content.length(), t.termLength());
117 assertEquals(content, t.term());
120 assertEquals(1048576, t.termLength());
122 // Test for slow growth to a long term
124 buf = new StringBuilder("a");
125 for (int i = 0; i < 20000; i++)
127 String content = buf.toString();
128 t.setTermBuffer(content);
129 assertEquals(content.length(), t.termLength());
130 assertEquals(content, t.term());
133 assertEquals(20000, t.termLength());
135 // Test for slow growth to a long term
137 buf = new StringBuilder("a");
138 for (int i = 0; i < 20000; i++)
140 String content = buf.toString();
141 t.setTermBuffer(content);
142 assertEquals(content.length(), t.termLength());
143 assertEquals(content, t.term());
146 assertEquals(20000, t.termLength());
149 public void testToString() throws Exception {
150 char[] b = {'a', 'l', 'o', 'h', 'a'};
151 Token t = new Token("", 0, 5);
152 t.setTermBuffer(b, 0, 5);
153 assertEquals("aloha", t.toString());
155 t.setTermBuffer("hi there");
156 assertEquals("hi there", t.toString());
159 public void testTermBufferEquals() throws Exception {
160 Token t1a = new Token();
161 char[] content1a = "hello".toCharArray();
162 t1a.setTermBuffer(content1a, 0, 5);
163 Token t1b = new Token();
164 char[] content1b = "hello".toCharArray();
165 t1b.setTermBuffer(content1b, 0, 5);
166 Token t2 = new Token();
167 char[] content2 = "hello2".toCharArray();
168 t2.setTermBuffer(content2, 0, 6);
169 assertTrue(t1a.equals(t1b));
170 assertFalse(t1a.equals(t2));
171 assertFalse(t2.equals(t1b));
174 public void testMixedStringArray() throws Exception {
175 Token t = new Token("hello", 0, 5);
176 assertEquals(t.termLength(), 5);
177 assertEquals(t.term(), "hello");
178 t.setTermBuffer("hello2");
179 assertEquals(t.termLength(), 6);
180 assertEquals(t.term(), "hello2");
181 t.setTermBuffer("hello3".toCharArray(), 0, 6);
182 assertEquals(t.term(), "hello3");
184 char[] buffer = t.termBuffer();
186 assertEquals(t.term(), "hollo3");
189 public void testClone() throws Exception {
190 Token t = new Token(0, 5);
191 char[] content = "hello".toCharArray();
192 t.setTermBuffer(content, 0, 5);
193 char[] buf = t.termBuffer();
194 Token copy = (Token) TestSimpleAttributeImpls.assertCloneIsEqual(t);
195 assertEquals(t.term(), copy.term());
196 assertNotSame(buf, copy.termBuffer());
198 Payload pl = new Payload(new byte[]{1,2,3,4});
200 copy = (Token) TestSimpleAttributeImpls.assertCloneIsEqual(t);
201 assertEquals(pl, copy.getPayload());
202 assertNotSame(pl, copy.getPayload());
205 public void testCopyTo() throws Exception {
206 Token t = new Token();
207 Token copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t);
208 assertEquals("", t.term());
209 assertEquals("", copy.term());
212 char[] content = "hello".toCharArray();
213 t.setTermBuffer(content, 0, 5);
214 char[] buf = t.termBuffer();
215 copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t);
216 assertEquals(t.term(), copy.term());
217 assertNotSame(buf, copy.termBuffer());
219 Payload pl = new Payload(new byte[]{1,2,3,4});
221 copy = (Token) TestSimpleAttributeImpls.assertCopyIsEqual(t);
222 assertEquals(pl, copy.getPayload());
223 assertNotSame(pl, copy.getPayload());
226 public interface SenselessAttribute extends Attribute {}
228 public static final class SenselessAttributeImpl extends AttributeImpl implements SenselessAttribute {
230 public void copyTo(AttributeImpl target) {}
232 public void clear() {}
234 public boolean equals(Object o) { return (o instanceof SenselessAttributeImpl); }
236 public int hashCode() { return 0; }
239 public void testTokenAttributeFactory() throws Exception {
240 TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"));
242 assertTrue("SenselessAttribute is not implemented by SenselessAttributeImpl",
243 ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
245 assertTrue("CharTermAttribute is not implemented by Token",
246 ts.addAttribute(CharTermAttribute.class) instanceof Token);
247 assertTrue("OffsetAttribute is not implemented by Token",
248 ts.addAttribute(OffsetAttribute.class) instanceof Token);
249 assertTrue("FlagsAttribute is not implemented by Token",
250 ts.addAttribute(FlagsAttribute.class) instanceof Token);
251 assertTrue("PayloadAttribute is not implemented by Token",
252 ts.addAttribute(PayloadAttribute.class) instanceof Token);
253 assertTrue("PositionIncrementAttribute is not implemented by Token",
254 ts.addAttribute(PositionIncrementAttribute.class) instanceof Token);
255 assertTrue("TypeAttribute is not implemented by Token",
256 ts.addAttribute(TypeAttribute.class) instanceof Token);
259 public void testAttributeReflection() throws Exception {
260 Token t = new Token("foobar", 6, 22, 8);
261 _TestUtil.assertAttributeReflection(t,
262 new HashMap<String,Object>() {{
263 put(CharTermAttribute.class.getName() + "#term", "foobar");
264 put(OffsetAttribute.class.getName() + "#startOffset", 6);
265 put(OffsetAttribute.class.getName() + "#endOffset", 22);
266 put(PositionIncrementAttribute.class.getName() + "#positionIncrement", 1);
267 put(PayloadAttribute.class.getName() + "#payload", null);
268 put(TypeAttribute.class.getName() + "#type", TypeAttribute.DEFAULT_TYPE);
269 put(FlagsAttribute.class.getName() + "#flags", 8);