1 package org.apache.lucene.analysis;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.util.Arrays;
21 import java.util.AbstractMap;
22 import java.util.AbstractSet;
23 import java.util.Iterator;
27 import org.apache.lucene.util.CharacterUtils;
28 import org.apache.lucene.util.Version;
31 * A simple class that stores key Strings as char[]'s in a
32 * hash table. Note that this is not a general purpose
33 * class. For example, it cannot remove items from the
34 * map, nor does it resize its hash table to be smaller,
35 * etc. It is designed to be quick to retrieve items
36 * by char[] keys without the necessity of converting
38 * <p>You must specify the required {@link Version}
39 * compatibility when creating {@link CharArrayMap}:
41 * <li> As of 3.1, supplementary characters are
42 * properly lowercased.</li>
44 * Before 3.1 supplementary characters could not be
45 * lowercased correctly due to the lack of Unicode 4
46 * support in JDK 1.4. To use instances of
47 * {@link CharArrayMap} with the behavior before Lucene
48 * 3.1 pass a {@link Version} < 3.1 to the constructors.
50 public class CharArrayMap<V> extends AbstractMap<Object,V> {
51 // private only because missing generics
52 private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<Object>();
54 private final static int INIT_SIZE = 8;
55 private final CharacterUtils charUtils;
56 private boolean ignoreCase;
58 final Version matchVersion; // package private because used in CharArraySet
59 char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
60 V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
63 * Create map with enough capacity to hold startSize terms
66 * compatibility match version see <a href="#version">Version
67 * note</a> above for details.
69 * the initial capacity
71 * <code>false</code> if and only if the set should be case sensitive
72 * otherwise <code>true</code>.
74 @SuppressWarnings("unchecked")
75 public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
76 this.ignoreCase = ignoreCase;
78 while(startSize + (startSize>>2) > size)
80 keys = new char[size][];
81 values = (V[]) new Object[size];
82 this.charUtils = CharacterUtils.getInstance(matchVersion);
83 this.matchVersion = matchVersion;
87 * Creates a map from the mappings in another map.
90 * compatibility match version see <a href="#version">Version
91 * note</a> above for details.
93 * a map whose mappings to be copied
95 * <code>false</code> if and only if the set should be case sensitive
96 * otherwise <code>true</code>.
98 public CharArrayMap(Version matchVersion, Map<?,? extends V> c, boolean ignoreCase) {
99 this(matchVersion, c.size(), ignoreCase);
103 /** Create set from the supplied map (used internally for readonly maps...) */
104 private CharArrayMap(CharArrayMap<V> toCopy){
105 this.keys = toCopy.keys;
106 this.values = toCopy.values;
107 this.ignoreCase = toCopy.ignoreCase;
108 this.count = toCopy.count;
109 this.charUtils = toCopy.charUtils;
110 this.matchVersion = toCopy.matchVersion;
113 /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
115 public void clear() {
117 Arrays.fill(keys, null);
118 Arrays.fill(values, null);
121 /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
122 * are in the {@link #keySet} */
123 public boolean containsKey(char[] text, int off, int len) {
124 return keys[getSlot(text, off, len)] != null;
127 /** true if the <code>CharSequence</code> is in the {@link #keySet} */
128 public boolean containsKey(CharSequence cs) {
129 return keys[getSlot(cs)] != null;
133 public boolean containsKey(Object o) {
134 if (o instanceof char[]) {
135 final char[] text = (char[])o;
136 return containsKey(text, 0, text.length);
138 return containsKey(o.toString());
141 /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
142 * starting at <code>off</code> */
143 public V get(char[] text, int off, int len) {
144 return values[getSlot(text, off, len)];
147 /** returns the value of the mapping of the chars inside this {@code CharSequence} */
148 public V get(CharSequence cs) {
149 return values[getSlot(cs)];
153 public V get(Object o) {
154 if (o instanceof char[]) {
155 final char[] text = (char[])o;
156 return get(text, 0, text.length);
158 return get(o.toString());
161 private int getSlot(char[] text, int off, int len) {
162 int code = getHashCode(text, off, len);
163 int pos = code & (keys.length-1);
164 char[] text2 = keys[pos];
165 if (text2 != null && !equals(text, off, len, text2)) {
166 final int inc = ((code>>8)+code)|1;
169 pos = code & (keys.length-1);
171 } while (text2 != null && !equals(text, off, len, text2));
176 /** Returns true if the String is in the set */
177 private int getSlot(CharSequence text) {
178 int code = getHashCode(text);
179 int pos = code & (keys.length-1);
180 char[] text2 = keys[pos];
181 if (text2 != null && !equals(text, text2)) {
182 final int inc = ((code>>8)+code)|1;
185 pos = code & (keys.length-1);
187 } while (text2 != null && !equals(text, text2));
192 /** Add the given mapping. */
193 public V put(CharSequence text, V value) {
194 return put(text.toString(), value); // could be more efficient
198 public V put(Object o, V value) {
199 if (o instanceof char[]) {
200 return put((char[])o, value);
202 return put(o.toString(), value);
205 /** Add the given mapping. */
206 public V put(String text, V value) {
207 return put(text.toCharArray(), value);
210 /** Add the given mapping.
211 * If ignoreCase is true for this Set, the text array will be directly modified.
212 * The user should never modify this text array after calling this method.
214 public V put(char[] text, V value) {
216 for(int i=0;i<text.length;){
217 i += Character.toChars(
218 Character.toLowerCase(
219 charUtils.codePointAt(text, i)), text, i);
221 int slot = getSlot(text, 0, text.length);
222 if (keys[slot] != null) {
223 final V oldValue = values[slot];
224 values[slot] = value;
228 values[slot] = value;
231 if (count + (count>>2) > keys.length) {
238 @SuppressWarnings("unchecked")
239 private void rehash() {
240 assert keys.length == values.length;
241 final int newSize = 2*keys.length;
242 final char[][] oldkeys = keys;
243 final V[] oldvalues = values;
244 keys = new char[newSize][];
245 values = (V[]) new Object[newSize];
247 for(int i=0; i<oldkeys.length; i++) {
248 char[] text = oldkeys[i];
250 // todo: could be faster... no need to compare strings on collision
251 final int slot = getSlot(text,0,text.length);
253 values[slot] = oldvalues[i];
258 private boolean equals(char[] text1, int off, int len, char[] text2) {
259 if (len != text2.length)
261 final int limit = off+len;
263 for(int i=0;i<len;) {
264 final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
265 if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
267 i += Character.charCount(codePointAt);
270 for(int i=0;i<len;i++) {
271 if (text1[off+i] != text2[i])
278 private boolean equals(CharSequence text1, char[] text2) {
279 int len = text1.length();
280 if (len != text2.length)
283 for(int i=0;i<len;) {
284 final int codePointAt = charUtils.codePointAt(text1, i);
285 if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
287 i += Character.charCount(codePointAt);
290 for(int i=0;i<len;i++) {
291 if (text1.charAt(i) != text2[i])
298 private int getHashCode(char[] text, int offset, int len) {
300 throw new NullPointerException();
302 final int stop = offset + len;
304 for (int i=offset; i<stop;) {
305 final int codePointAt = charUtils.codePointAt(text, i, stop);
306 code = code*31 + Character.toLowerCase(codePointAt);
307 i += Character.charCount(codePointAt);
310 for (int i=offset; i<stop; i++) {
311 code = code*31 + text[i];
317 private int getHashCode(CharSequence text) {
319 throw new NullPointerException();
321 int len = text.length();
323 for (int i=0; i<len;) {
324 int codePointAt = charUtils.codePointAt(text, i);
325 code = code*31 + Character.toLowerCase(codePointAt);
326 i += Character.charCount(codePointAt);
329 for (int i=0; i<len; i++) {
330 code = code*31 + text.charAt(i);
337 public V remove(Object key) {
338 throw new UnsupportedOperationException();
347 public String toString() {
348 final StringBuilder sb = new StringBuilder("{");
349 for (Map.Entry<Object,V> entry : entrySet()) {
350 if (sb.length()>1) sb.append(", ");
353 return sb.append('}').toString();
356 private EntrySet entrySet = null;
357 private CharArraySet keySet = null;
359 EntrySet createEntrySet() {
360 return new EntrySet(true);
364 public final EntrySet entrySet() {
365 if (entrySet == null) {
366 entrySet = createEntrySet();
371 // helper for CharArraySet to not produce endless recursion
372 final Set<Object> originalKeySet() {
373 return super.keySet();
376 /** Returns an {@link CharArraySet} view on the map's keys.
377 * The set will use the same {@code matchVersion} as this map. */
378 @Override @SuppressWarnings("unchecked")
379 public final CharArraySet keySet() {
380 if (keySet == null) {
381 // prevent adding of entries
382 keySet = new CharArraySet((CharArrayMap) this) {
384 public boolean add(Object o) {
385 throw new UnsupportedOperationException();
388 public boolean add(CharSequence text) {
389 throw new UnsupportedOperationException();
392 public boolean add(String text) {
393 throw new UnsupportedOperationException();
396 public boolean add(char[] text) {
397 throw new UnsupportedOperationException();
404 /** public iterator class so efficient methods are exposed to users */
405 public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
408 private final boolean allowModify;
410 private EntryIterator(boolean allowModify) {
411 this.allowModify = allowModify;
415 private void goNext() {
418 while (pos < keys.length && keys[pos] == null) pos++;
421 public boolean hasNext() {
422 return pos < keys.length;
425 /** gets the next key... do not modify the returned char[] */
426 public char[] nextKey() {
428 return keys[lastPos];
431 /** gets the next key as a newly created String object */
432 public String nextKeyString() {
433 return new String(nextKey());
436 /** returns the value associated with the last key returned */
437 public V currentValue() {
438 return values[lastPos];
441 /** sets the value associated with the last key returned */
442 public V setValue(V value) {
444 throw new UnsupportedOperationException();
445 V old = values[lastPos];
446 values[lastPos] = value;
450 /** use nextCharArray() + currentValue() for better efficiency. */
451 public Map.Entry<Object,V> next() {
453 return new MapEntry(lastPos, allowModify);
456 public void remove() {
457 throw new UnsupportedOperationException();
461 private final class MapEntry implements Map.Entry<Object,V> {
462 private final int pos;
463 private final boolean allowModify;
465 private MapEntry(int pos, boolean allowModify) {
467 this.allowModify = allowModify;
470 public Object getKey() {
471 // we must clone here, as putAll to another CharArrayMap
472 // with other case sensitivity flag would corrupt the keys
473 return keys[pos].clone();
476 public V getValue() {
480 public V setValue(V value) {
482 throw new UnsupportedOperationException();
483 final V old = values[pos];
489 public String toString() {
490 return new StringBuilder().append(keys[pos]).append('=')
491 .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
496 /** public EntrySet class so efficient methods are exposed to users */
497 public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
498 private final boolean allowModify;
500 private EntrySet(boolean allowModify) {
501 this.allowModify = allowModify;
505 public EntryIterator iterator() {
506 return new EntryIterator(allowModify);
510 public boolean contains(Object o) {
511 if (!(o instanceof Map.Entry))
513 final Map.Entry e = (Map.Entry)o;
514 final Object key = e.getKey();
515 final Object val = e.getValue();
516 final Object v = get(key);
517 return v == null ? val == null : v.equals(val);
521 public boolean remove(Object o) {
522 throw new UnsupportedOperationException();
531 public void clear() {
533 throw new UnsupportedOperationException();
534 CharArrayMap.this.clear();
539 * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
540 * unmodifiable views of internal map for "read-only" use.
543 * a map for which the unmodifiable map is returned.
544 * @return an new unmodifiable {@link CharArrayMap}.
545 * @throws NullPointerException
546 * if the given map is <code>null</code>.
548 public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
550 throw new NullPointerException("Given map is null");
551 if (map == emptyMap() || map.isEmpty())
553 if (map instanceof UnmodifiableCharArrayMap)
555 return new UnmodifiableCharArrayMap<V>(map);
559 * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
560 * is a {@link CharArrayMap} the ignoreCase property will be preserved.
562 * <b>Note:</b> If you intend to create a copy of another {@link CharArrayMap} where
563 * the {@link Version} of the source map differs from its copy
564 * {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
565 * The {@link #copy(Version, Map)} will preserve the {@link Version} of the
566 * source map it is an instance of {@link CharArrayMap}.
569 * @param matchVersion
570 * compatibility match version see <a href="#version">Version
571 * note</a> above for details. This argument will be ignored if the
572 * given map is a {@link CharArrayMap}.
575 * @return a copy of the given map as a {@link CharArrayMap}. If the given map
576 * is a {@link CharArrayMap} the ignoreCase property as well as the
577 * matchVersion will be of the given map will be preserved.
579 @SuppressWarnings("unchecked")
580 public static <V> CharArrayMap<V> copy(final Version matchVersion, final Map<?,? extends V> map) {
583 if(map instanceof CharArrayMap) {
584 CharArrayMap<V> m = (CharArrayMap<V>) map;
585 // use fast path instead of iterating all values
586 // this is even on very small sets ~10 times faster than iterating
587 final char[][] keys = new char[m.keys.length][];
588 System.arraycopy(m.keys, 0, keys, 0, keys.length);
589 final V[] values = (V[]) new Object[m.values.length];
590 System.arraycopy(m.values, 0, values, 0, values.length);
591 m = new CharArrayMap<V>(m);
596 return new CharArrayMap<V>(matchVersion, map, false);
599 /** Returns an empty, unmodifiable map. */
600 @SuppressWarnings("unchecked")
601 public static <V> CharArrayMap<V> emptyMap() {
602 return (CharArrayMap<V>) EMPTY_MAP;
605 // package private CharArraySet instanceof check in CharArraySet
606 static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
608 UnmodifiableCharArrayMap(CharArrayMap<V> map) {
613 public void clear() {
614 throw new UnsupportedOperationException();
618 public V put(Object o, V val){
619 throw new UnsupportedOperationException();
623 public V put(char[] text, V val) {
624 throw new UnsupportedOperationException();
628 public V put(CharSequence text, V val) {
629 throw new UnsupportedOperationException();
633 public V put(String text, V val) {
634 throw new UnsupportedOperationException();
638 public V remove(Object key) {
639 throw new UnsupportedOperationException();
643 EntrySet createEntrySet() {
644 return new EntrySet(false);
649 * Empty {@link UnmodifiableCharArrayMap} optimized for speed.
650 * Contains checks will always return <code>false</code> or throw
653 private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
654 EmptyCharArrayMap() {
655 super(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
659 public boolean containsKey(char[] text, int off, int len) {
661 throw new NullPointerException();
666 public boolean containsKey(CharSequence cs) {
668 throw new NullPointerException();
673 public boolean containsKey(Object o) {
675 throw new NullPointerException();
680 public V get(char[] text, int off, int len) {
682 throw new NullPointerException();
687 public V get(CharSequence cs) {
689 throw new NullPointerException();
694 public V get(Object o) {
696 throw new NullPointerException();