X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java?ds=sidebyside

diff --git a/lucene-java-3.4.0/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java b/lucene-java-3.4.0/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java
deleted file mode 100644
index 957ab20..0000000
--- a/lucene-java-3.4.0/lucene/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java
+++ /dev/null
@@ -1,228 +0,0 @@
-package org.apache.lucene.search;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.util.FixedBitSet;
-
-public class DuplicateFilter extends Filter
-{
-	
-	String fieldName;
-	
-	/**
-	 * KeepMode determines which document id to consider as the master, all others being 
-	 * identified as duplicates. Selecting the "first occurrence" can potentially save on IO.
-	 */
-	int keepMode=KM_USE_FIRST_OCCURRENCE;
-	public static final int KM_USE_FIRST_OCCURRENCE=1;
-	public static final int KM_USE_LAST_OCCURRENCE=2;
-	
-	/**
-	 * "Full" processing mode starts by setting all bits to false and only setting bits
-	 * for documents that contain the given field and are identified as none-duplicates. 
-
-	 * "Fast" processing sets all bits to true then unsets all duplicate docs found for the
-	 * given field. This approach avoids the need to read TermDocs for terms that are seen 
-	 * to have a document frequency of exactly "1" (i.e. no duplicates). While a potentially 
-	 * faster approach , the downside is that bitsets produced will include bits set for 
-	 * documents that do not actually contain the field given.
-	 * 
-	 */
-	int processingMode=PM_FULL_VALIDATION;
-	public static final int PM_FULL_VALIDATION=1;
-	public static final int PM_FAST_INVALIDATION=2;
-	
-
-	
-	public DuplicateFilter(String fieldName)
-	{
-		this(fieldName, KM_USE_LAST_OCCURRENCE,PM_FULL_VALIDATION);
-	}
-	
-
-	public DuplicateFilter(String fieldName, int keepMode, int processingMode)
-	{
-		this.fieldName = fieldName;
-		this.keepMode = keepMode;
-		this.processingMode = processingMode;
-	}
-
-  @Override
-  public DocIdSet getDocIdSet(IndexReader reader) throws IOException
-	{
-		if(processingMode==PM_FAST_INVALIDATION)
-		{
-			return fastBits(reader);
-		}
-		else
-		{
-			return correctBits(reader);
-		}
-	}
-	
-  private FixedBitSet correctBits(IndexReader reader) throws IOException
-	{
-		
-    FixedBitSet bits=new FixedBitSet(reader.maxDoc()); //assume all are INvalid
-		Term startTerm=new Term(fieldName);
-		TermEnum te = reader.terms(startTerm);
-		if(te!=null)
-		{
-			Term currTerm=te.term();
-			while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
-			{
-				int lastDoc=-1;
-				//set non duplicates
-				TermDocs td = reader.termDocs(currTerm);
-				if(td.next())
-				{
-					if(keepMode==KM_USE_FIRST_OCCURRENCE)
-					{
-						bits.set(td.doc());
-					}
-					else
-					{
-						do
-						{
-							lastDoc=td.doc();
-						}while(td.next());
-						bits.set(lastDoc);
-					}
-				}
-				if(!te.next())
-				{
-					break;
-				}
-				currTerm=te.term();
-			}
-		}
-		return bits;
-	}
-	
-  private FixedBitSet fastBits(IndexReader reader) throws IOException
-	{
-		
-    FixedBitSet bits=new FixedBitSet(reader.maxDoc());
-		bits.set(0,reader.maxDoc()); //assume all are valid
-		Term startTerm=new Term(fieldName);
-		TermEnum te = reader.terms(startTerm);
-		if(te!=null)
-		{
-			Term currTerm=te.term();
-			
-			while((currTerm!=null)&&(currTerm.field()==startTerm.field())) //term fieldnames are interned
-			{
-				if(te.docFreq()>1)
-				{
-					int lastDoc=-1;
-					//unset potential duplicates
-					TermDocs td = reader.termDocs(currTerm);
-					td.next();
-					if(keepMode==KM_USE_FIRST_OCCURRENCE)
-					{
-						td.next();
-					}
-					do
-					{
-						lastDoc=td.doc();
-            bits.clear(lastDoc);
-					}while(td.next());
-					if(keepMode==KM_USE_LAST_OCCURRENCE)
-					{
-						//restore the last bit
-						bits.set(lastDoc);
-					}					
-				}
-				if(!te.next())
-				{
-					break;
-				}
-				currTerm=te.term();
-			}
-		}
-		return bits;
-	}
-
-	public String getFieldName()
-	{
-		return fieldName;
-	}
-
-
-	public void setFieldName(String fieldName)
-	{
-		this.fieldName = fieldName;
-	}
-
-
-	public int getKeepMode()
-	{
-		return keepMode;
-	}
-
-
-	public void setKeepMode(int keepMode)
-	{
-		this.keepMode = keepMode;
-	}
-
-
-	@Override
-	public boolean equals(Object obj)
-	{
-		if(this == obj)
-			return true;
-		if((obj == null) || (obj.getClass() != this.getClass()))
-			return false;
-		DuplicateFilter other = (DuplicateFilter)obj;
-		return keepMode == other.keepMode &&
-		processingMode == other.processingMode &&
-			(fieldName == other.fieldName || (fieldName != null && fieldName.equals(other.fieldName)));
-	}
-
-
-
-	@Override
-	public int hashCode()
-	{
-		int hash = 217;
-		hash = 31 * hash + keepMode;
-		hash = 31 * hash + processingMode;
-		hash = 31 * hash + fieldName.hashCode();
-		return hash;	
-	}
-
-
-	public int getProcessingMode()
-	{
-		return processingMode;
-	}
-
-
-	public void setProcessingMode(int processingMode)
-	{
-		this.processingMode = processingMode;
-	}
-	
-	
-
-}