X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/CharArraySet.java?ds=sidebyside diff --git a/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/CharArraySet.java b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/CharArraySet.java new file mode 100644 index 0000000..a86811a --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/java/org/apache/lucene/analysis/CharArraySet.java @@ -0,0 +1,327 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.AbstractSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; + +import org.apache.lucene.util.Version; + +/** + * A simple class that stores Strings as char[]'s in a + * hash table. Note that this is not a general purpose + * class. For example, it cannot remove items from the + * set, nor does it resize its hash table to be smaller, + * etc. It is designed to be quick to test if a char[] + * is in the set without the necessity of converting it + * to a String first. + *

You must specify the required {@link Version} + * compatibility when creating {@link CharArraySet}: + *

+ * Before 3.1 supplementary characters could not be + * lowercased correctly due to the lack of Unicode 4 + * support in JDK 1.4. To use instances of + * {@link CharArraySet} with the behavior before Lucene + * 3.1 pass a {@link Version} < 3.1 to the constructors. + *

+ * Please note: This class implements {@link java.util.Set Set} but + * does not behave like it should in all cases. The generic type is + * {@code Set}, because you can add any object to it, + * that has a string representation. The add methods will use + * {@link Object#toString} and store the result using a {@code char[]} + * buffer. The same behavior have the {@code contains()} methods. + * The {@link #iterator()} returns an {@code Iterator}. + * For type safety also {@link #stringIterator()} is provided. + */ +public class CharArraySet extends AbstractSet { + public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap()); + private static final Object PLACEHOLDER = new Object(); + + private final CharArrayMap map; + + /** + * Create set with enough capacity to hold startSize terms + * + * @param matchVersion + * compatibility match version see Version + * note above for details. + * @param startSize + * the initial capacity + * @param ignoreCase + * false if and only if the set should be case sensitive + * otherwise true. + */ + public CharArraySet(Version matchVersion, int startSize, boolean ignoreCase) { + this(new CharArrayMap(matchVersion, startSize, ignoreCase)); + } + + /** + * Creates a set from a Collection of objects. + * + * @param matchVersion + * compatibility match version see Version + * note above for details. + * @param c + * a collection whose elements to be placed into the set + * @param ignoreCase + * false if and only if the set should be case sensitive + * otherwise true. + */ + public CharArraySet(Version matchVersion, Collection c, boolean ignoreCase) { + this(matchVersion, c.size(), ignoreCase); + addAll(c); + } + + /** + * Creates a set with enough capacity to hold startSize terms + * + * @param startSize + * the initial capacity + * @param ignoreCase + * false if and only if the set should be case sensitive + * otherwise true. + * @deprecated use {@link #CharArraySet(Version, int, boolean)} instead + */ + @Deprecated + public CharArraySet(int startSize, boolean ignoreCase) { + this(Version.LUCENE_30, startSize, ignoreCase); + } + + /** + * Creates a set from a Collection of objects. + * + * @param c + * a collection whose elements to be placed into the set + * @param ignoreCase + * false if and only if the set should be case sensitive + * otherwise true. + * @deprecated use {@link #CharArraySet(Version, Collection, boolean)} instead + */ + @Deprecated + public CharArraySet(Collection c, boolean ignoreCase) { + this(Version.LUCENE_30, c.size(), ignoreCase); + addAll(c); + } + + /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */ + CharArraySet(final CharArrayMap map){ + this.map = map; + } + + /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */ + @Override + public void clear() { + map.clear(); + } + + /** true if the len chars of text starting at off + * are in the set */ + public boolean contains(char[] text, int off, int len) { + return map.containsKey(text, off, len); + } + + /** true if the CharSequence is in the set */ + public boolean contains(CharSequence cs) { + return map.containsKey(cs); + } + + @Override + public boolean contains(Object o) { + return map.containsKey(o); + } + + @Override + public boolean add(Object o) { + return map.put(o, PLACEHOLDER) == null; + } + + /** Add this CharSequence into the set */ + public boolean add(CharSequence text) { + return map.put(text, PLACEHOLDER) == null; + } + + /** Add this String into the set */ + public boolean add(String text) { + return map.put(text, PLACEHOLDER) == null; + } + + /** Add this char[] directly to the set. + * If ignoreCase is true for this Set, the text array will be directly modified. + * The user should never modify this text array after calling this method. + */ + public boolean add(char[] text) { + return map.put(text, PLACEHOLDER) == null; + } + + @Override + public int size() { + return map.size(); + } + + /** + * Returns an unmodifiable {@link CharArraySet}. This allows to provide + * unmodifiable views of internal sets for "read-only" use. + * + * @param set + * a set for which the unmodifiable set is returned. + * @return an new unmodifiable {@link CharArraySet}. + * @throws NullPointerException + * if the given set is null. + */ + public static CharArraySet unmodifiableSet(CharArraySet set) { + if (set == null) + throw new NullPointerException("Given set is null"); + if (set == EMPTY_SET) + return EMPTY_SET; + if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap) + return set; + return new CharArraySet(CharArrayMap.unmodifiableMap(set.map)); + } + + /** + * Returns a copy of the given set as a {@link CharArraySet}. If the given set + * is a {@link CharArraySet} the ignoreCase property will be preserved. + * + * @param set + * a set to copy + * @return a copy of the given set as a {@link CharArraySet}. If the given set + * is a {@link CharArraySet} the ignoreCase and matchVersion property will be + * preserved. + * @deprecated use {@link #copy(Version, Set)} instead. + */ + @Deprecated + public static CharArraySet copy(final Set set) { + if(set == EMPTY_SET) + return EMPTY_SET; + return copy(Version.LUCENE_30, set); + } + + /** + * Returns a copy of the given set as a {@link CharArraySet}. If the given set + * is a {@link CharArraySet} the ignoreCase property will be preserved. + *

+ * Note: If you intend to create a copy of another {@link CharArraySet} where + * the {@link Version} of the source set differs from its copy + * {@link #CharArraySet(Version, Collection, boolean)} should be used instead. + * The {@link #copy(Version, Set)} will preserve the {@link Version} of the + * source set it is an instance of {@link CharArraySet}. + *

+ * + * @param matchVersion + * compatibility match version see Version + * note above for details. This argument will be ignored if the + * given set is a {@link CharArraySet}. + * @param set + * a set to copy + * @return a copy of the given set as a {@link CharArraySet}. If the given set + * is a {@link CharArraySet} the ignoreCase property as well as the + * matchVersion will be of the given set will be preserved. + */ + public static CharArraySet copy(final Version matchVersion, final Set set) { + if(set == EMPTY_SET) + return EMPTY_SET; + if(set instanceof CharArraySet) { + final CharArraySet source = (CharArraySet) set; + return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map)); + } + return new CharArraySet(matchVersion, set, false); + } + + /** The Iterator for this set. Strings are constructed on the fly, so + * use nextCharArray for more efficient access. + * @deprecated Use the standard iterator, which returns {@code char[]} instances. + */ + @Deprecated + public class CharArraySetIterator implements Iterator { + int pos=-1; + char[] next; + private CharArraySetIterator() { + goNext(); + } + + private void goNext() { + next = null; + pos++; + while (pos < map.keys.length && (next=map.keys[pos]) == null) pos++; + } + + public boolean hasNext() { + return next != null; + } + + /** do not modify the returned char[] */ + public char[] nextCharArray() { + char[] ret = next; + goNext(); + return ret; + } + + /** Returns the next String, as a Set would... + * use nextCharArray() for better efficiency. */ + public String next() { + return new String(nextCharArray()); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + /** returns an iterator of new allocated Strings (an instance of {@link CharArraySetIterator}). + * @deprecated Use {@link #iterator}, which returns {@code char[]} instances. + */ + @Deprecated + public Iterator stringIterator() { + return new CharArraySetIterator(); + } + + /** Returns an {@link Iterator} depending on the version used: + *
    + *
  • if {@code matchVersion} ≥ 3.1, it returns {@code char[]} instances in this set.
  • + *
  • if {@code matchVersion} is 3.0 or older, it returns new + * allocated Strings, so this method violates the Set interface. + * It is kept this way for backwards compatibility, normally it should + * return {@code char[]} on {@code next()}
  • + *
+ */ + @Override @SuppressWarnings("unchecked") + public Iterator iterator() { + // use the AbstractSet#keySet()'s iterator (to not produce endless recursion) + return map.matchVersion.onOrAfter(Version.LUCENE_31) ? + map.originalKeySet().iterator() : (Iterator) stringIterator(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("["); + for (Object item : this) { + if (sb.length()>1) sb.append(", "); + if (item instanceof char[]) { + sb.append((char[]) item); + } else { + sb.append(item); + } + } + return sb.append(']').toString(); + } +}