--- /dev/null
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;
+
+/**
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
+ * class. For example, it cannot remove items from the
+ * map, nor does it resize its hash table to be smaller,
+ * etc. It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
+ * to a String first.
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating {@link CharArrayMap}:
+ * <ul>
+ * <li> As of 3.1, supplementary characters are
+ * properly lowercased.</li>
+ * </ul>
+ * Before 3.1 supplementary characters could not be
+ * lowercased correctly due to the lack of Unicode 4
+ * support in JDK 1.4. To use instances of
+ * {@link CharArrayMap} with the behavior before Lucene
+ * 3.1 pass a {@link Version} < 3.1 to the constructors.
+ */
+public class CharArrayMap<V> extends AbstractMap<Object,V> {
+ // private only because missing generics
+ private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<Object>();
+
+ private final static int INIT_SIZE = 8;
+ private final CharacterUtils charUtils;
+ private boolean ignoreCase;
+ private int count;
+ final Version matchVersion; // package private because used in CharArraySet
+ char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+ V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+
+ /**
+ * Create map with enough capacity to hold startSize terms
+ *
+ * @param matchVersion
+ * compatibility match version see <a href="#version">Version
+ * note</a> above for details.
+ * @param startSize
+ * the initial capacity
+ * @param ignoreCase
+ * <code>false</code> if and only if the set should be case sensitive
+ * otherwise <code>true</code>.
+ */
+ @SuppressWarnings("unchecked")
+ public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
+ this.ignoreCase = ignoreCase;
+ int size = INIT_SIZE;
+ while(startSize + (startSize>>2) > size)
+ size <<= 1;
+ keys = new char[size][];
+ values = (V[]) new Object[size];
+ this.charUtils = CharacterUtils.getInstance(matchVersion);
+ this.matchVersion = matchVersion;
+ }
+
+ /**
+ * Creates a map from the mappings in another map.
+ *
+ * @param matchVersion
+ * compatibility match version see <a href="#version">Version
+ * note</a> above for details.
+ * @param c
+ * a map whose mappings to be copied
+ * @param ignoreCase
+ * <code>false</code> if and only if the set should be case sensitive
+ * otherwise <code>true</code>.
+ */
+ public CharArrayMap(Version matchVersion, Map<?,? extends V> c, boolean ignoreCase) {
+ this(matchVersion, c.size(), ignoreCase);
+ putAll(c);
+ }
+
+ /** Create set from the supplied map (used internally for readonly maps...) */
+ private CharArrayMap(CharArrayMap<V> toCopy){
+ this.keys = toCopy.keys;
+ this.values = toCopy.values;
+ this.ignoreCase = toCopy.ignoreCase;
+ this.count = toCopy.count;
+ this.charUtils = toCopy.charUtils;
+ this.matchVersion = toCopy.matchVersion;
+ }
+
+ /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
+ @Override
+ public void clear() {
+ count = 0;
+ Arrays.fill(keys, null);
+ Arrays.fill(values, null);
+ }
+
+ /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+ * are in the {@link #keySet} */
+ public boolean containsKey(char[] text, int off, int len) {
+ return keys[getSlot(text, off, len)] != null;
+ }
+
+ /** true if the <code>CharSequence</code> is in the {@link #keySet} */
+ public boolean containsKey(CharSequence cs) {
+ return keys[getSlot(cs)] != null;
+ }
+
+ @Override
+ public boolean containsKey(Object o) {
+ if (o instanceof char[]) {
+ final char[] text = (char[])o;
+ return containsKey(text, 0, text.length);
+ }
+ return containsKey(o.toString());
+ }
+
+ /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
+ * starting at <code>off</code> */
+ public V get(char[] text, int off, int len) {
+ return values[getSlot(text, off, len)];
+ }
+
+ /** returns the value of the mapping of the chars inside this {@code CharSequence} */
+ public V get(CharSequence cs) {
+ return values[getSlot(cs)];
+ }
+
+ @Override
+ public V get(Object o) {
+ if (o instanceof char[]) {
+ final char[] text = (char[])o;
+ return get(text, 0, text.length);
+ }
+ return get(o.toString());
+ }
+
+ private int getSlot(char[] text, int off, int len) {
+ int code = getHashCode(text, off, len);
+ int pos = code & (keys.length-1);
+ char[] text2 = keys[pos];
+ if (text2 != null && !equals(text, off, len, text2)) {
+ final int inc = ((code>>8)+code)|1;
+ do {
+ code += inc;
+ pos = code & (keys.length-1);
+ text2 = keys[pos];
+ } while (text2 != null && !equals(text, off, len, text2));
+ }
+ return pos;
+ }
+
+ /** Returns true if the String is in the set */
+ private int getSlot(CharSequence text) {
+ int code = getHashCode(text);
+ int pos = code & (keys.length-1);
+ char[] text2 = keys[pos];
+ if (text2 != null && !equals(text, text2)) {
+ final int inc = ((code>>8)+code)|1;
+ do {
+ code += inc;
+ pos = code & (keys.length-1);
+ text2 = keys[pos];
+ } while (text2 != null && !equals(text, text2));
+ }
+ return pos;
+ }
+
+ /** Add the given mapping. */
+ public V put(CharSequence text, V value) {
+ return put(text.toString(), value); // could be more efficient
+ }
+
+ @Override
+ public V put(Object o, V value) {
+ if (o instanceof char[]) {
+ return put((char[])o, value);
+ }
+ return put(o.toString(), value);
+ }
+
+ /** Add the given mapping. */
+ public V put(String text, V value) {
+ return put(text.toCharArray(), value);
+ }
+
+ /** Add the given mapping.
+ * If ignoreCase is true for this Set, the text array will be directly modified.
+ * The user should never modify this text array after calling this method.
+ */
+ public V put(char[] text, V value) {
+ if (ignoreCase)
+ for(int i=0;i<text.length;){
+ i += Character.toChars(
+ Character.toLowerCase(
+ charUtils.codePointAt(text, i)), text, i);
+ }
+ int slot = getSlot(text, 0, text.length);
+ if (keys[slot] != null) {
+ final V oldValue = values[slot];
+ values[slot] = value;
+ return oldValue;
+ }
+ keys[slot] = text;
+ values[slot] = value;
+ count++;
+
+ if (count + (count>>2) > keys.length) {
+ rehash();
+ }
+
+ return null;
+ }
+
+ @SuppressWarnings("unchecked")
+ private void rehash() {
+ assert keys.length == values.length;
+ final int newSize = 2*keys.length;
+ final char[][] oldkeys = keys;
+ final V[] oldvalues = values;
+ keys = new char[newSize][];
+ values = (V[]) new Object[newSize];
+
+ for(int i=0; i<oldkeys.length; i++) {
+ char[] text = oldkeys[i];
+ if (text != null) {
+ // todo: could be faster... no need to compare strings on collision
+ final int slot = getSlot(text,0,text.length);
+ keys[slot] = text;
+ values[slot] = oldvalues[i];
+ }
+ }
+ }
+
+ private boolean equals(char[] text1, int off, int len, char[] text2) {
+ if (len != text2.length)
+ return false;
+ final int limit = off+len;
+ if (ignoreCase) {
+ for(int i=0;i<len;) {
+ final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+ if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
+ return false;
+ i += Character.charCount(codePointAt);
+ }
+ } else {
+ for(int i=0;i<len;i++) {
+ if (text1[off+i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean equals(CharSequence text1, char[] text2) {
+ int len = text1.length();
+ if (len != text2.length)
+ return false;
+ if (ignoreCase) {
+ for(int i=0;i<len;) {
+ final int codePointAt = charUtils.codePointAt(text1, i);
+ if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i))
+ return false;
+ i += Character.charCount(codePointAt);
+ }
+ } else {
+ for(int i=0;i<len;i++) {
+ if (text1.charAt(i) != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private int getHashCode(char[] text, int offset, int len) {
+ if (text == null)
+ throw new NullPointerException();
+ int code = 0;
+ final int stop = offset + len;
+ if (ignoreCase) {
+ for (int i=offset; i<stop;) {
+ final int codePointAt = charUtils.codePointAt(text, i, stop);
+ code = code*31 + Character.toLowerCase(codePointAt);
+ i += Character.charCount(codePointAt);
+ }
+ } else {
+ for (int i=offset; i<stop; i++) {
+ code = code*31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int getHashCode(CharSequence text) {
+ if (text == null)
+ throw new NullPointerException();
+ int code = 0;
+ int len = text.length();
+ if (ignoreCase) {
+ for (int i=0; i<len;) {
+ int codePointAt = charUtils.codePointAt(text, i);
+ code = code*31 + Character.toLowerCase(codePointAt);
+ i += Character.charCount(codePointAt);
+ }
+ } else {
+ for (int i=0; i<len; i++) {
+ code = code*31 + text.charAt(i);
+ }
+ }
+ return code;
+ }
+
+ @Override
+ public V remove(Object key) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int size() {
+ return count;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("{");
+ for (Map.Entry<Object,V> entry : entrySet()) {
+ if (sb.length()>1) sb.append(", ");
+ sb.append(entry);
+ }
+ return sb.append('}').toString();
+ }
+
+ private EntrySet entrySet = null;
+ private CharArraySet keySet = null;
+
+ EntrySet createEntrySet() {
+ return new EntrySet(true);
+ }
+
+ @Override
+ public final EntrySet entrySet() {
+ if (entrySet == null) {
+ entrySet = createEntrySet();
+ }
+ return entrySet;
+ }
+
+ // helper for CharArraySet to not produce endless recursion
+ final Set<Object> originalKeySet() {
+ return super.keySet();
+ }
+
+ /** Returns an {@link CharArraySet} view on the map's keys.
+ * The set will use the same {@code matchVersion} as this map. */
+ @Override @SuppressWarnings("unchecked")
+ public final CharArraySet keySet() {
+ if (keySet == null) {
+ // prevent adding of entries
+ keySet = new CharArraySet((CharArrayMap) this) {
+ @Override
+ public boolean add(Object o) {
+ throw new UnsupportedOperationException();
+ }
+ @Override
+ public boolean add(CharSequence text) {
+ throw new UnsupportedOperationException();
+ }
+ @Override
+ public boolean add(String text) {
+ throw new UnsupportedOperationException();
+ }
+ @Override
+ public boolean add(char[] text) {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ return keySet;
+ }
+
+ /** public iterator class so efficient methods are exposed to users */
+ public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
+ private int pos=-1;
+ private int lastPos;
+ private final boolean allowModify;
+
+ private EntryIterator(boolean allowModify) {
+ this.allowModify = allowModify;
+ goNext();
+ }
+
+ private void goNext() {
+ lastPos = pos;
+ pos++;
+ while (pos < keys.length && keys[pos] == null) pos++;
+ }
+
+ public boolean hasNext() {
+ return pos < keys.length;
+ }
+
+ /** gets the next key... do not modify the returned char[] */
+ public char[] nextKey() {
+ goNext();
+ return keys[lastPos];
+ }
+
+ /** gets the next key as a newly created String object */
+ public String nextKeyString() {
+ return new String(nextKey());
+ }
+
+ /** returns the value associated with the last key returned */
+ public V currentValue() {
+ return values[lastPos];
+ }
+
+ /** sets the value associated with the last key returned */
+ public V setValue(V value) {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ V old = values[lastPos];
+ values[lastPos] = value;
+ return old;
+ }
+
+ /** use nextCharArray() + currentValue() for better efficiency. */
+ public Map.Entry<Object,V> next() {
+ goNext();
+ return new MapEntry(lastPos, allowModify);
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ private final class MapEntry implements Map.Entry<Object,V> {
+ private final int pos;
+ private final boolean allowModify;
+
+ private MapEntry(int pos, boolean allowModify) {
+ this.pos = pos;
+ this.allowModify = allowModify;
+ }
+
+ public Object getKey() {
+ // we must clone here, as putAll to another CharArrayMap
+ // with other case sensitivity flag would corrupt the keys
+ return keys[pos].clone();
+ }
+
+ public V getValue() {
+ return values[pos];
+ }
+
+ public V setValue(V value) {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ final V old = values[pos];
+ values[pos] = value;
+ return old;
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder().append(keys[pos]).append('=')
+ .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
+ .toString();
+ }
+ }
+
+ /** public EntrySet class so efficient methods are exposed to users */
+ public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
+ private final boolean allowModify;
+
+ private EntrySet(boolean allowModify) {
+ this.allowModify = allowModify;
+ }
+
+ @Override
+ public EntryIterator iterator() {
+ return new EntryIterator(allowModify);
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ if (!(o instanceof Map.Entry))
+ return false;
+ final Map.Entry e = (Map.Entry)o;
+ final Object key = e.getKey();
+ final Object val = e.getValue();
+ final Object v = get(key);
+ return v == null ? val == null : v.equals(val);
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int size() {
+ return count;
+ }
+
+ @Override
+ public void clear() {
+ if (!allowModify)
+ throw new UnsupportedOperationException();
+ CharArrayMap.this.clear();
+ }
+ }
+
+ /**
+ * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
+ * unmodifiable views of internal map for "read-only" use.
+ *
+ * @param map
+ * a map for which the unmodifiable map is returned.
+ * @return an new unmodifiable {@link CharArrayMap}.
+ * @throws NullPointerException
+ * if the given map is <code>null</code>.
+ */
+ public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
+ if (map == null)
+ throw new NullPointerException("Given map is null");
+ if (map == emptyMap() || map.isEmpty())
+ return emptyMap();
+ if (map instanceof UnmodifiableCharArrayMap)
+ return map;
+ return new UnmodifiableCharArrayMap<V>(map);
+ }
+
+ /**
+ * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
+ * is a {@link CharArrayMap} the ignoreCase property will be preserved.
+ * <p>
+ * <b>Note:</b> If you intend to create a copy of another {@link CharArrayMap} where
+ * the {@link Version} of the source map differs from its copy
+ * {@link #CharArrayMap(Version, Map, boolean)} should be used instead.
+ * The {@link #copy(Version, Map)} will preserve the {@link Version} of the
+ * source map it is an instance of {@link CharArrayMap}.
+ * </p>
+ *
+ * @param matchVersion
+ * compatibility match version see <a href="#version">Version
+ * note</a> above for details. This argument will be ignored if the
+ * given map is a {@link CharArrayMap}.
+ * @param map
+ * a map to copy
+ * @return a copy of the given map as a {@link CharArrayMap}. If the given map
+ * is a {@link CharArrayMap} the ignoreCase property as well as the
+ * matchVersion will be of the given map will be preserved.
+ */
+ @SuppressWarnings("unchecked")
+ public static <V> CharArrayMap<V> copy(final Version matchVersion, final Map<?,? extends V> map) {
+ if(map == EMPTY_MAP)
+ return emptyMap();
+ if(map instanceof CharArrayMap) {
+ CharArrayMap<V> m = (CharArrayMap<V>) map;
+ // use fast path instead of iterating all values
+ // this is even on very small sets ~10 times faster than iterating
+ final char[][] keys = new char[m.keys.length][];
+ System.arraycopy(m.keys, 0, keys, 0, keys.length);
+ final V[] values = (V[]) new Object[m.values.length];
+ System.arraycopy(m.values, 0, values, 0, values.length);
+ m = new CharArrayMap<V>(m);
+ m.keys = keys;
+ m.values = values;
+ return m;
+ }
+ return new CharArrayMap<V>(matchVersion, map, false);
+ }
+
+ /** Returns an empty, unmodifiable map. */
+ @SuppressWarnings("unchecked")
+ public static <V> CharArrayMap<V> emptyMap() {
+ return (CharArrayMap<V>) EMPTY_MAP;
+ }
+
+ // package private CharArraySet instanceof check in CharArraySet
+ static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
+
+ UnmodifiableCharArrayMap(CharArrayMap<V> map) {
+ super(map);
+ }
+
+ @Override
+ public void clear() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public V put(Object o, V val){
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public V put(char[] text, V val) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public V put(CharSequence text, V val) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public V put(String text, V val) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public V remove(Object key) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ EntrySet createEntrySet() {
+ return new EntrySet(false);
+ }
+ }
+
+ /**
+ * Empty {@link UnmodifiableCharArrayMap} optimized for speed.
+ * Contains checks will always return <code>false</code> or throw
+ * NPE if necessary.
+ */
+ private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
+ EmptyCharArrayMap() {
+ super(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
+ }
+
+ @Override
+ public boolean containsKey(char[] text, int off, int len) {
+ if(text == null)
+ throw new NullPointerException();
+ return false;
+ }
+
+ @Override
+ public boolean containsKey(CharSequence cs) {
+ if(cs == null)
+ throw new NullPointerException();
+ return false;
+ }
+
+ @Override
+ public boolean containsKey(Object o) {
+ if(o == null)
+ throw new NullPointerException();
+ return false;
+ }
+
+ @Override
+ public V get(char[] text, int off, int len) {
+ if(text == null)
+ throw new NullPointerException();
+ return null;
+ }
+
+ @Override
+ public V get(CharSequence cs) {
+ if(cs == null)
+ throw new NullPointerException();
+ return null;
+ }
+
+ @Override
+ public V get(Object o) {
+ if(o == null)
+ throw new NullPointerException();
+ return null;
+ }
+ }
+}