+++ /dev/null
-package org.apache.lucene.analysis;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.Iterator;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
-
-
-public class TestCharArraySet extends LuceneTestCase {
-
- static final String[] TEST_STOP_WORDS = {
- "a", "an", "and", "are", "as", "at", "be", "but", "by",
- "for", "if", "in", "into", "is", "it",
- "no", "not", "of", "on", "or", "such",
- "that", "the", "their", "then", "there", "these",
- "they", "this", "to", "was", "will", "with"
- };
-
-
- public void testRehash() throws Exception {
- CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- cas.add(TEST_STOP_WORDS[i]);
- assertEquals(TEST_STOP_WORDS.length, cas.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertTrue(cas.contains(TEST_STOP_WORDS[i]));
- }
-
- public void testNonZeroOffset() {
- String[] words={"Hello","World","this","is","a","test"};
- char[] findme="xthisy".toCharArray();
- CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
- set.addAll(Arrays.asList(words));
- assertTrue(set.contains(findme, 1, 4));
- assertTrue(set.contains(new String(findme,1,4)));
-
- // test unmodifiable
- set = CharArraySet.unmodifiableSet(set);
- assertTrue(set.contains(findme, 1, 4));
- assertTrue(set.contains(new String(findme,1,4)));
- }
-
- public void testObjectContains() {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
- Integer val = Integer.valueOf(1);
- set.add(val);
- assertTrue(set.contains(val));
- assertTrue(set.contains(new Integer(1))); // another integer
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
- // test unmodifiable
- set = CharArraySet.unmodifiableSet(set);
- assertTrue(set.contains(val));
- assertTrue(set.contains(new Integer(1))); // another integer
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
- }
-
- public void testClear(){
- CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
- set.clear();
- assertEquals("not empty", 0, set.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertFalse(set.contains(TEST_STOP_WORDS[i]));
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
- for(int i=0;i<TEST_STOP_WORDS.length;i++)
- assertTrue(set.contains(TEST_STOP_WORDS[i]));
- }
-
- public void testModifyOnUnmodifiable(){
- CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10, true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- final int size = set.size();
- set = CharArraySet.unmodifiableSet(set);
- assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
- String NOT_IN_SET = "SirGallahad";
- assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
-
- try{
- set.add(NOT_IN_SET.toCharArray());
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.add(NOT_IN_SET);
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.add(new StringBuilder(NOT_IN_SET));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.clear();
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
- try{
- set.add((Object) NOT_IN_SET);
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
- // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
- // remove() on the iterator
- try{
- set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertEquals("Size of unmodifiable set has changed", size, set.size());
- }
-
- try{
- set.addAll(Arrays.asList(new String[]{NOT_IN_SET}));
- fail("Modified unmodifiable set");
- }catch (UnsupportedOperationException e) {
- // expected
- assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
- }
-
- for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
- assertTrue(set.contains(TEST_STOP_WORDS[i]));
- }
- }
-
- public void testUnmodifiableSet(){
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
- set.add(Integer.valueOf(1));
- final int size = set.size();
- set = CharArraySet.unmodifiableSet(set);
- assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
- for (String stopword : TEST_STOP_WORDS) {
- assertTrue(set.contains(stopword));
- }
- assertTrue(set.contains(Integer.valueOf(1)));
- assertTrue(set.contains("1"));
- assertTrue(set.contains(new char[]{'1'}));
-
- try{
- CharArraySet.unmodifiableSet(null);
- fail("can not make null unmodifiable");
- }catch (NullPointerException e) {
- // expected
- }
- }
-
- public void testSupplementaryChars() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- // for reference see
- // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
- String[] upperArr = new String[] {"Abc\ud801\udc1c",
- "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
- String[] lowerArr = new String[] {"abc\ud801\udc44",
- "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
- }
- set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
- }
- }
-
- public void testSingleHighSurrogate() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
- "\uD800EfG", "\uD800\ud801\udc1cB" };
-
- String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
- "\uD800efg", "\uD800\ud801\udc44b" };
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays
- .asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
- }
- set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
- false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(falsePos, upperArr[i]), set
- .contains(lowerArr[i]));
- }
- }
-
- /**
- * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
- * no longer needed.
- */
- @Deprecated
- public void testSupplementaryCharsBWCompat() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- // for reference see
- // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
- String[] upperArr = new String[] {"Abc\ud801\udc1c",
- "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
- String[] lowerArr = new String[] {"abc\ud801\udc44",
- "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
- CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
- }
- set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
- }
- }
-
- /**
- * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
- * no longer needed.
- */
- @Deprecated
- public void testSingleHighSurrogateBWComapt() {
- String missing = "Term %s is missing in the set";
- String falsePos = "Term %s is in the set but shouldn't";
- String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
- "\uD800EfG", "\uD800\ud801\udc1cB" };
-
- String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
- "\uD800efg", "\uD800\ud801\udc44b" };
- CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays
- .asList(TEST_STOP_WORDS), true);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- if (i == lowerArr.length - 1)
- assertFalse(String.format(falsePos, lowerArr[i]), set
- .contains(lowerArr[i]));
- else
- assertTrue(String.format(missing, lowerArr[i]), set
- .contains(lowerArr[i]));
- }
- set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
- false);
- for (String upper : upperArr) {
- set.add(upper);
- }
- for (int i = 0; i < upperArr.length; i++) {
- assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
- assertFalse(String.format(falsePos, lowerArr[i]), set
- .contains(lowerArr[i]));
- }
- }
-
- @SuppressWarnings("deprecated")
- public void testCopyCharArraySetBWCompat() {
- CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
- CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<String>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase());
- }
- setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
- setIngoreCase.add(Integer.valueOf(1));
- setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
- setCaseSensitive.add(Integer.valueOf(1));
-
- // This should use the deprecated methods, because it checks a bw compatibility.
- CharArraySet copy = CharArraySet.copy(setIngoreCase);
- CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
- assertEquals(setIngoreCase.size(), copy.size());
- assertEquals(setCaseSensitive.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copyCaseSens.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copyCaseSens.contains(string));
- }
- // test adding terms to the copy
- List<String> newWords = new ArrayList<String>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(setIngoreCase.contains(string));
- assertFalse(setCaseSensitive.contains(string));
-
- }
- }
-
- /**
- * Test the static #copy() function with a CharArraySet as a source
- */
- public void testCopyCharArraySet() {
- CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
- CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<String>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase());
- }
- setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
- setIngoreCase.add(Integer.valueOf(1));
- setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
- setCaseSensitive.add(Integer.valueOf(1));
-
- CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
- CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);
-
- assertEquals(setIngoreCase.size(), copy.size());
- assertEquals(setCaseSensitive.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copyCaseSens.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copyCaseSens.contains(string));
- }
- // test adding terms to the copy
- List<String> newWords = new ArrayList<String>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(stopwordsUpper));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(setIngoreCase.contains(string));
- assertFalse(setCaseSensitive.contains(string));
-
- }
- }
-
- /**
- * Test the static #copy() function with a JDK {@link Set} as a source
- */
- public void testCopyJDKSet() {
- Set<String> set = new HashSet<String>();
-
- List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
- List<String> stopwordsUpper = new ArrayList<String>();
- for (String string : stopwords) {
- stopwordsUpper.add(string.toUpperCase());
- }
- set.addAll(Arrays.asList(TEST_STOP_WORDS));
-
- CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);
-
- assertEquals(set.size(), copy.size());
- assertEquals(set.size(), copy.size());
-
- assertTrue(copy.containsAll(stopwords));
- for (String string : stopwordsUpper) {
- assertFalse(copy.contains(string));
- }
-
- List<String> newWords = new ArrayList<String>();
- for (String string : stopwords) {
- newWords.add(string+"_1");
- }
- copy.addAll(newWords);
-
- assertTrue(copy.containsAll(stopwords));
- assertTrue(copy.containsAll(newWords));
- // new added terms are not in the source set
- for (String string : newWords) {
- assertFalse(set.contains(string));
- }
- }
-
- /**
- * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the
- * set to copy is the {@link CharArraySet#EMPTY_SET}
- */
- public void testCopyEmptySet() {
- assertSame(CharArraySet.EMPTY_SET,
- CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET));
- }
-
- /**
- * Smoketests the static empty set
- */
- public void testEmptySet() {
- assertEquals(0, CharArraySet.EMPTY_SET.size());
-
- assertTrue(CharArraySet.EMPTY_SET.isEmpty());
- for (String stopword : TEST_STOP_WORDS) {
- assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
- }
- assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
- assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
- assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
- assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
- }
-
- /**
- * Test for NPE
- */
- public void testContainsWithNull() {
- CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
- try {
- set.contains((char[]) null, 0, 10);
- fail("null value must raise NPE");
- } catch (NullPointerException e) {}
- try {
- set.contains((CharSequence) null);
- fail("null value must raise NPE");
- } catch (NullPointerException e) {}
- try {
- set.contains((Object) null);
- fail("null value must raise NPE");
- } catch (NullPointerException e) {}
- }
-
- @Deprecated @SuppressWarnings("unchecked")
- public void testIterator() {
- HashSet<String> hset = new HashSet<String>();
- hset.addAll(Arrays.asList(TEST_STOP_WORDS));
-
- assertTrue("in 3.0 version, iterator should be CharArraySetIterator",
- ((Iterator) CharArraySet.copy(Version.LUCENE_30, hset).iterator()) instanceof CharArraySet.CharArraySetIterator);
-
- CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, hset);
- assertFalse("in current version, iterator should not be CharArraySetIterator",
- ((Iterator) set.iterator()) instanceof CharArraySet.CharArraySetIterator);
-
- Iterator<String> it = set.stringIterator();
- assertTrue(it instanceof CharArraySet.CharArraySetIterator);
- while (it.hasNext()) {
- // as the set returns String instances, this must work:
- assertTrue(hset.contains(it.next()));
- try {
- it.remove();
- fail("remove() should not work on CharArraySetIterator");
- } catch (UnsupportedOperationException uoe) {
- // pass
- }
- }
- }
-
- public void testToString() {
- CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));
- assertEquals("[test]", set.toString());
- set.add("test2");
- assertTrue(set.toString().contains(", "));
-
- set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
- assertEquals("[test]", set.toString());
- set.add("test2");
- assertTrue(set.toString().contains(", "));
- }
-}