package org.apache.lucene.analysis;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.Iterator;

import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;


public class TestCharArraySet extends LuceneTestCase {
  
  static final String[] TEST_STOP_WORDS = {
    "a", "an", "and", "are", "as", "at", "be", "but", "by",
    "for", "if", "in", "into", "is", "it",
    "no", "not", "of", "on", "or", "such",
    "that", "the", "their", "then", "there", "these",
    "they", "this", "to", "was", "will", "with"
  };
  
  
  public void testRehash() throws Exception {
    CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 0, true);
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      cas.add(TEST_STOP_WORDS[i]);
    assertEquals(TEST_STOP_WORDS.length, cas.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
  }

  public void testNonZeroOffset() {
    String[] words={"Hello","World","this","is","a","test"};
    char[] findme="xthisy".toCharArray();   
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(words));
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme,1,4)));
    
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(findme, 1, 4));
    assertTrue(set.contains(new String(findme,1,4)));
  }
  
  public void testObjectContains() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    Integer val = Integer.valueOf(1);
    set.add(val);
    assertTrue(set.contains(val));
    assertTrue(set.contains(new Integer(1))); // another integer
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
    // test unmodifiable
    set = CharArraySet.unmodifiableSet(set);
    assertTrue(set.contains(val));
    assertTrue(set.contains(new Integer(1))); // another integer
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
  }
  
  public void testClear(){
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    set.clear();
    assertEquals("not empty", 0, set.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertFalse(set.contains(TEST_STOP_WORDS[i]));
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
    for(int i=0;i<TEST_STOP_WORDS.length;i++)
      assertTrue(set.contains(TEST_STOP_WORDS[i]));
  }
  
  public void testModifyOnUnmodifiable(){
    CharArraySet set=new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
    String NOT_IN_SET = "SirGallahad";
    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
    
    try{
      set.add(NOT_IN_SET.toCharArray());  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.add(NOT_IN_SET);  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.add(new StringBuilder(NOT_IN_SET));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.clear();  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    try{
      set.add((Object) NOT_IN_SET);  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
    // remove() on the iterator
    try{
      set.removeAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.retainAll(new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(NOT_IN_SET), true));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertEquals("Size of unmodifiable set has changed", size, set.size());
    }
    
    try{
      set.addAll(Arrays.asList(new String[]{NOT_IN_SET}));  
      fail("Modified unmodifiable set");
    }catch (UnsupportedOperationException e) {
      // expected
      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
    }
    
    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
    }
  }
  
  public void testUnmodifiableSet(){
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 10,true);
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
    set.add(Integer.valueOf(1));
    final int size = set.size();
    set = CharArraySet.unmodifiableSet(set);
    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
    for (String stopword : TEST_STOP_WORDS) {
      assertTrue(set.contains(stopword));
    }
    assertTrue(set.contains(Integer.valueOf(1)));
    assertTrue(set.contains("1"));
    assertTrue(set.contains(new char[]{'1'}));
    
    try{
      CharArraySet.unmodifiableSet(null);
      fail("can not make null unmodifiable");
    }catch (NullPointerException e) {
      // expected
    }
  }
  
  public void testSupplementaryChars() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    // for reference see
    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
    String[] upperArr = new String[] {"Abc\ud801\udc1c",
        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
    String[] lowerArr = new String[] {"abc\ud801\udc44",
        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }
  
  public void testSingleHighSurrogate() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
        "\uD800EfG", "\uD800\ud801\udc1cB" };

    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
        "\uD800efg", "\uD800\ud801\udc44b" };
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays
        .asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
        false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, upperArr[i]), set
          .contains(lowerArr[i]));
    }
  }
  
  /**
   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
   *             no longer needed.
   */
  @Deprecated
  public void testSupplementaryCharsBWCompat() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    // for reference see
    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
    String[] upperArr = new String[] {"Abc\ud801\udc1c",
        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
    String[] lowerArr = new String[] {"abc\ud801\udc44",
        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }

  /**
   * @deprecated remove this test when lucene 3.0 "broken unicode 4" support is
   *             no longer needed.
   */
  @Deprecated
  public void testSingleHighSurrogateBWComapt() {
    String missing = "Term %s is missing in the set";
    String falsePos = "Term %s is in the set but shouldn't";
    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
        "\uD800EfG", "\uD800\ud801\udc1cB" };

    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
        "\uD800efg", "\uD800\ud801\udc44b" };
    CharArraySet set = new CharArraySet(Version.LUCENE_30, Arrays
        .asList(TEST_STOP_WORDS), true);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      if (i == lowerArr.length - 1)
        assertFalse(String.format(falsePos, lowerArr[i]), set
            .contains(lowerArr[i]));
      else
        assertTrue(String.format(missing, lowerArr[i]), set
            .contains(lowerArr[i]));
    }
    set = new CharArraySet(Version.LUCENE_30, Arrays.asList(TEST_STOP_WORDS),
        false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
      assertFalse(String.format(falsePos, lowerArr[i]), set
          .contains(lowerArr[i]));
    }
  }
  
  @SuppressWarnings("deprecated")
  public void testCopyCharArraySetBWCompat() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    // This should use the deprecated methods, because it checks a bw compatibility.
    CharArraySet copy = CharArraySet.copy(setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList<String>();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));  
      assertFalse(setCaseSensitive.contains(string));  

    }
  }
  
  /**
   * Test the static #copy() function with a CharArraySet as a source
   */
  public void testCopyCharArraySet() {
    CharArraySet setIngoreCase = new CharArraySet(TEST_VERSION_CURRENT, 10, true);
    CharArraySet setCaseSensitive = new CharArraySet(TEST_VERSION_CURRENT, 10, false);

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
    setCaseSensitive.add(Integer.valueOf(1));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, setIngoreCase);
    CharArraySet copyCaseSens = CharArraySet.copy(TEST_VERSION_CURRENT, setCaseSensitive);

    assertEquals(setIngoreCase.size(), copy.size());
    assertEquals(setCaseSensitive.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copyCaseSens.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copyCaseSens.contains(string));
    }
    // test adding terms to the copy
    List<String> newWords = new ArrayList<String>();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(stopwordsUpper));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(setIngoreCase.contains(string));  
      assertFalse(setCaseSensitive.contains(string));  

    }
  }
  
  /**
   * Test the static #copy() function with a JDK {@link Set} as a source
   */
  public void testCopyJDKSet() {
    Set<String> set = new HashSet<String>();

    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
      stopwordsUpper.add(string.toUpperCase());
    }
    set.addAll(Arrays.asList(TEST_STOP_WORDS));

    CharArraySet copy = CharArraySet.copy(TEST_VERSION_CURRENT, set);

    assertEquals(set.size(), copy.size());
    assertEquals(set.size(), copy.size());

    assertTrue(copy.containsAll(stopwords));
    for (String string : stopwordsUpper) {
      assertFalse(copy.contains(string));
    }
    
    List<String> newWords = new ArrayList<String>();
    for (String string : stopwords) {
      newWords.add(string+"_1");
    }
    copy.addAll(newWords);
    
    assertTrue(copy.containsAll(stopwords));
    assertTrue(copy.containsAll(newWords));
    // new added terms are not in the source set
    for (String string : newWords) {
      assertFalse(set.contains(string));  
    }
  }
  
  /**
   * Tests a special case of {@link CharArraySet#copy(Version, Set)} where the
   * set to copy is the {@link CharArraySet#EMPTY_SET}
   */
  public void testCopyEmptySet() {
    assertSame(CharArraySet.EMPTY_SET, 
        CharArraySet.copy(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET));
  }

  /**
   * Smoketests the static empty set
   */
  public void testEmptySet() {
    assertEquals(0, CharArraySet.EMPTY_SET.size());
    
    assertTrue(CharArraySet.EMPTY_SET.isEmpty());
    for (String stopword : TEST_STOP_WORDS) {
      assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
    }
    assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
    assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
  }
  
  /**
   * Test for NPE
   */
  public void testContainsWithNull() {
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
    try {
      set.contains((char[]) null, 0, 10);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
    try {
      set.contains((CharSequence) null);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
    try {
      set.contains((Object) null);
      fail("null value must raise NPE");
    } catch (NullPointerException e) {}
  }
  
  @Deprecated @SuppressWarnings("unchecked")
  public void testIterator() {
    HashSet<String> hset = new HashSet<String>();
    hset.addAll(Arrays.asList(TEST_STOP_WORDS));

    assertTrue("in 3.0 version, iterator should be CharArraySetIterator",
      ((Iterator) CharArraySet.copy(Version.LUCENE_30, hset).iterator()) instanceof CharArraySet.CharArraySetIterator);

    CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, hset);
    assertFalse("in current version, iterator should not be CharArraySetIterator",
      ((Iterator) set.iterator()) instanceof CharArraySet.CharArraySetIterator);
    
    Iterator<String> it = set.stringIterator();
    assertTrue(it instanceof CharArraySet.CharArraySetIterator);
    while (it.hasNext()) {
      // as the set returns String instances, this must work:
      assertTrue(hset.contains(it.next()));
      try {
        it.remove();
        fail("remove() should not work on CharArraySetIterator");
      } catch (UnsupportedOperationException uoe) {
        // pass
      }
    }
  }
  
  public void testToString() {
    CharArraySet set = CharArraySet.copy(TEST_VERSION_CURRENT, Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
    
    set = CharArraySet.copy(Version.LUCENE_30, Collections.singleton("test"));
    assertEquals("[test]", set.toString());
    set.add("test2");
    assertTrue(set.toString().contains(", "));
  }
}
