1 package org.apache.lucene.analysis.tokenattributes;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.Serializable;
21 import java.nio.CharBuffer;
23 import org.apache.lucene.util.ArrayUtil;
24 import org.apache.lucene.util.AttributeImpl;
25 import org.apache.lucene.util.AttributeReflector;
26 import org.apache.lucene.util.RamUsageEstimator;
29 * The term text of a Token.
31 public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermAttribute, Cloneable, Serializable {
32 private static int MIN_BUFFER_SIZE = 10;
34 private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
35 private int termLength = 0;
38 public String term() {
39 // don't delegate to toString() here!
40 return new String(termBuffer, 0, termLength);
43 public final void copyBuffer(char[] buffer, int offset, int length) {
44 growTermBuffer(length);
45 System.arraycopy(buffer, offset, termBuffer, 0, length);
50 public void setTermBuffer(char[] buffer, int offset, int length) {
51 copyBuffer(buffer, offset, length);
55 public void setTermBuffer(String buffer) {
56 int length = buffer.length();
57 growTermBuffer(length);
58 buffer.getChars(0, length, termBuffer, 0);
63 public void setTermBuffer(String buffer, int offset, int length) {
64 assert offset <= buffer.length();
65 assert offset + length <= buffer.length();
66 growTermBuffer(length);
67 buffer.getChars(offset, offset + length, termBuffer, 0);
71 public final char[] buffer() {
76 public char[] termBuffer() {
80 public final char[] resizeBuffer(int newSize) {
81 if(termBuffer.length < newSize){
82 // Not big enough; create a new array with slight
83 // over allocation and preserve content
84 final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
85 System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
86 termBuffer = newCharBuffer;
92 public char[] resizeTermBuffer(int newSize) {
93 return resizeBuffer(newSize);
96 private void growTermBuffer(int newSize) {
97 if(termBuffer.length < newSize){
98 // Not big enough; create a new array with slight
100 termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
105 public int termLength() {
109 public final CharTermAttribute setLength(int length) {
110 if (length > termBuffer.length)
111 throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
116 public final CharTermAttribute setEmpty() {
122 public void setTermLength(int length) {
126 // *** CharSequence interface ***
127 public final int length() {
131 public final char charAt(int index) {
132 if (index >= termLength)
133 throw new IndexOutOfBoundsException();
134 return termBuffer[index];
137 public final CharSequence subSequence(final int start, final int end) {
138 if (start > termLength || end > termLength)
139 throw new IndexOutOfBoundsException();
140 return new String(termBuffer, start, end - start);
143 // *** Appendable interface ***
145 public final CharTermAttribute append(CharSequence csq) {
146 if (csq == null) // needed for Appendable compliance
148 return append(csq, 0, csq.length());
151 public final CharTermAttribute append(CharSequence csq, int start, int end) {
152 if (csq == null) // needed for Appendable compliance
154 final int len = end - start, csqlen = csq.length();
155 if (len < 0 || start > csqlen || end > csqlen)
156 throw new IndexOutOfBoundsException();
159 resizeBuffer(termLength + len);
160 if (len > 4) { // only use instanceof check series for longer CSQs, else simply iterate
161 if (csq instanceof String) {
162 ((String) csq).getChars(start, end, termBuffer, termLength);
163 } else if (csq instanceof StringBuilder) {
164 ((StringBuilder) csq).getChars(start, end, termBuffer, termLength);
165 } else if (csq instanceof CharTermAttribute) {
166 System.arraycopy(((CharTermAttribute) csq).buffer(), start, termBuffer, termLength, len);
167 } else if (csq instanceof CharBuffer && ((CharBuffer) csq).hasArray()) {
168 final CharBuffer cb = (CharBuffer) csq;
169 System.arraycopy(cb.array(), cb.arrayOffset() + cb.position() + start, termBuffer, termLength, len);
170 } else if (csq instanceof StringBuffer) {
171 ((StringBuffer) csq).getChars(start, end, termBuffer, termLength);
174 termBuffer[termLength++] = csq.charAt(start++);
175 // no fall-through here, as termLength is updated!
182 termBuffer[termLength++] = csq.charAt(start++);
187 public final CharTermAttribute append(char c) {
188 resizeBuffer(termLength + 1)[termLength++] = c;
192 // *** For performance some convenience methods in addition to CSQ's ***
194 public final CharTermAttribute append(String s) {
195 if (s == null) // needed for Appendable compliance
197 final int len = s.length();
198 s.getChars(0, len, resizeBuffer(termLength + len), termLength);
203 public final CharTermAttribute append(StringBuilder s) {
204 if (s == null) // needed for Appendable compliance
206 final int len = s.length();
207 s.getChars(0, len, resizeBuffer(termLength + len), termLength);
212 public final CharTermAttribute append(CharTermAttribute ta) {
213 if (ta == null) // needed for Appendable compliance
215 final int len = ta.length();
216 System.arraycopy(ta.buffer(), 0, resizeBuffer(termLength + len), termLength, len);
221 private CharTermAttribute appendNull() {
222 resizeBuffer(termLength + 4);
223 termBuffer[termLength++] = 'n';
224 termBuffer[termLength++] = 'u';
225 termBuffer[termLength++] = 'l';
226 termBuffer[termLength++] = 'l';
230 // *** AttributeImpl ***
233 public int hashCode() {
234 int code = termLength;
235 code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
240 public void clear() {
245 public Object clone() {
246 CharTermAttributeImpl t = (CharTermAttributeImpl)super.clone();
248 t.termBuffer = new char[this.termLength];
249 System.arraycopy(this.termBuffer, 0, t.termBuffer, 0, this.termLength);
254 public boolean equals(Object other) {
259 if (other instanceof CharTermAttributeImpl) {
260 final CharTermAttributeImpl o = ((CharTermAttributeImpl) other);
261 if (termLength != o.termLength)
263 for(int i=0;i<termLength;i++) {
264 if (termBuffer[i] != o.termBuffer[i]) {
275 * Returns solely the term text as specified by the
276 * {@link CharSequence} interface.
277 * <p>This method changed the behavior with Lucene 3.1,
278 * before it returned a String representation of the whole
279 * term with all attributes.
280 * This affects especially the
281 * {@link org.apache.lucene.analysis.Token} subclass.
284 public String toString() {
285 // CharSequence requires that only the contents are returned, but this is orginal code: "term=" + new String(termBuffer, 0, termLength)
286 return new String(termBuffer, 0, termLength);
290 public void reflectWith(AttributeReflector reflector) {
291 reflector.reflect(CharTermAttribute.class, "term", toString());
295 public void copyTo(AttributeImpl target) {
296 if (target instanceof CharTermAttribute) {
297 CharTermAttribute t = (CharTermAttribute) target;
298 t.copyBuffer(termBuffer, 0, termLength);
300 TermAttribute t = (TermAttribute) target;
301 t.setTermBuffer(termBuffer, 0, termLength);