pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.4.0 / lucene / contrib / queries / src / test / org / apache / lucene / search / DuplicateFilterTest.java
diff --git a/lucene-java-3.4.0/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java b/lucene-java-3.4.0/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
deleted file mode 100644 (file)
index fe1699a..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.HashSet;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class DuplicateFilterTest extends LuceneTestCase {
-       private static final String KEY_FIELD = "url";
-       private Directory directory;
-       private IndexReader reader;
-       TermQuery tq=new TermQuery(new Term("text","lucene"));
-       private IndexSearcher searcher;
-
-       @Override
-       public void setUp() throws Exception {
-    super.setUp();
-               directory = newDirectory();
-               RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
-               
-               //Add series of docs with filterable fields : url, text and dates  flags
-               addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
-               addDoc(writer, "http://lucene.apache.org", "New release pending", "20040102");
-               addDoc(writer, "http://lucene.apache.org", "Lucene 1.9 out now", "20050101");           
-               addDoc(writer, "http://www.bar.com", "Local man bites dog", "20040101");
-               addDoc(writer, "http://www.bar.com", "Dog bites local man", "20040102");
-               addDoc(writer, "http://www.bar.com", "Dog uses Lucene", "20050101");
-               addDoc(writer, "http://lucene.apache.org", "Lucene 2.0 out", "20050101");
-               addDoc(writer, "http://lucene.apache.org", "Oops. Lucene 2.1 out", "20050102");
-
-                // Until we fix LUCENE-2348, the index must
-                // have only 1 segment:
-                writer.optimize();
-
-               reader = writer.getReader();
-               writer.close();                 
-               searcher =newSearcher(reader);
-               
-       }
-       
-       @Override
-       public void tearDown() throws Exception {
-               reader.close();
-               searcher.close();
-               directory.close();
-               super.tearDown();
-       }
-
-       private void addDoc(RandomIndexWriter writer, String url, String text, String date) throws IOException
-       {
-               Document doc=new Document();
-               doc.add(newField(KEY_FIELD,url,Field.Store.YES,Field.Index.NOT_ANALYZED));
-               doc.add(newField("text",text,Field.Store.YES,Field.Index.ANALYZED));
-               doc.add(newField("date",date,Field.Store.YES,Field.Index.ANALYZED));
-               writer.addDocument(doc);
-       }
-               
-       public void testDefaultFilter() throws Throwable
-       {
-               DuplicateFilter df=new DuplicateFilter(KEY_FIELD);              
-               HashSet<String> results=new HashSet<String>();
-               ScoreDoc[] hits = searcher.search(tq,df, 1000).scoreDocs;
-               for(int i=0;i<hits.length;i++)
-               {
-                       Document d=searcher.doc(hits[i].doc);
-                       String url=d.get(KEY_FIELD);
-                       assertFalse("No duplicate urls should be returned",results.contains(url));
-                       results.add(url);
-               }
-       }
-       public void testNoFilter() throws Throwable
-       {
-               HashSet<String> results=new HashSet<String>();
-               ScoreDoc[] hits = searcher.search(tq, null, 1000).scoreDocs;
-               assertTrue("Default searching should have found some matches",hits.length>0);
-               boolean dupsFound=false;
-               for(int i=0;i<hits.length;i++)
-               {
-                       Document d=searcher.doc(hits[i].doc);
-                       String url=d.get(KEY_FIELD);
-                       if(!dupsFound)
-                               dupsFound=results.contains(url);
-                       results.add(url);
-               }
-               assertTrue("Default searching should have found duplicate urls",dupsFound);
-       }
-       
-       public void testFastFilter() throws Throwable
-       {
-               DuplicateFilter df=new DuplicateFilter(KEY_FIELD);
-               df.setProcessingMode(DuplicateFilter.PM_FAST_INVALIDATION);
-               HashSet<String> results=new HashSet<String>();
-               ScoreDoc[] hits = searcher.search(tq,df, 1000).scoreDocs;
-               assertTrue("Filtered searching should have found some matches",hits.length>0);
-               for(int i=0;i<hits.length;i++)
-               {
-                       Document d=searcher.doc(hits[i].doc);
-                       String url=d.get(KEY_FIELD);
-                       assertFalse("No duplicate urls should be returned",results.contains(url));
-                       results.add(url);
-               }
-               assertEquals("Two urls found",2, results.size());
-       }       
-       public void testKeepsLastFilter() throws Throwable
-       {
-               DuplicateFilter df=new DuplicateFilter(KEY_FIELD);
-               df.setKeepMode(DuplicateFilter.KM_USE_LAST_OCCURRENCE);
-               ScoreDoc[] hits = searcher.search(tq,df, 1000).scoreDocs;
-               assertTrue("Filtered searching should have found some matches",hits.length>0);
-               for(int i=0;i<hits.length;i++)
-               {
-                       Document d=searcher.doc(hits[i].doc);
-                       String url=d.get(KEY_FIELD);
-                       TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
-                       int lastDoc=0;
-                       while(td.next())
-                       {
-                               lastDoc=td.doc();
-                       }
-                       assertEquals("Duplicate urls should return last doc",lastDoc, hits[i].doc);
-               }
-       }       
-       
-       
-       public void testKeepsFirstFilter() throws Throwable
-       {
-               DuplicateFilter df=new DuplicateFilter(KEY_FIELD);
-               df.setKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE);
-               ScoreDoc[] hits = searcher.search(tq,df, 1000).scoreDocs;
-               assertTrue("Filtered searching should have found some matches",hits.length>0);
-               for(int i=0;i<hits.length;i++)
-               {
-                       Document d=searcher.doc(hits[i].doc);
-                       String url=d.get(KEY_FIELD);
-                       TermDocs td = reader.termDocs(new Term(KEY_FIELD,url));
-                       int lastDoc=0;
-                       td.next();
-                       lastDoc=td.doc();
-                       assertEquals("Duplicate urls should return first doc",lastDoc, hits[i].doc);
-               }
-       }       
-       
-       
-}