lucene-java-3.4.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java

   1 package org.apache.lucene.facet.util;
   2
   3 import java.io.IOException;
   4 import java.util.Random;
   5
   6 import org.apache.lucene.analysis.MockAnalyzer;
   7 import org.apache.lucene.analysis.MockTokenizer;
   8 import org.apache.lucene.document.Document;
   9 import org.apache.lucene.document.Field;
  10 import org.apache.lucene.document.Field.Index;
  11 import org.apache.lucene.document.Field.Store;
  12 import org.apache.lucene.index.IndexReader;
  13 import org.apache.lucene.index.RandomIndexWriter;
  14 import org.apache.lucene.index.Term;
  15 import org.apache.lucene.search.DocIdSet;
  16 import org.apache.lucene.search.DocIdSetIterator;
  17 import org.apache.lucene.search.IndexSearcher;
  18 import org.apache.lucene.search.Query;
  19 import org.apache.lucene.search.TermQuery;
  20 import org.apache.lucene.store.Directory;
  21 import org.apache.lucene.util.OpenBitSet;
  22 import org.apache.lucene.util.OpenBitSetDISI;
  23 import org.junit.Test;
  24
  25 import org.apache.lucene.util.LuceneTestCase;
  26 import org.apache.lucene.facet.search.ScoredDocIDs;
  27 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
  28 import org.apache.lucene.facet.search.ScoredDocIdCollector;
  29
  30 /**
  31  * Licensed to the Apache Software Foundation (ASF) under one or more
  32  * contributor license agreements.  See the NOTICE file distributed with
  33  * this work for additional information regarding copyright ownership.
  34  * The ASF licenses this file to You under the Apache License, Version 2.0
  35  * (the "License"); you may not use this file except in compliance with
  36  * the License.  You may obtain a copy of the License at
  37  *
  38  *     http://www.apache.org/licenses/LICENSE-2.0
  39  *
  40  * Unless required by applicable law or agreed to in writing, software
  41  * distributed under the License is distributed on an "AS IS" BASIS,
  42  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  43  * See the License for the specific language governing permissions and
  44  * limitations under the License.
  45  */
  46
  47 public class TestScoredDocIDsUtils extends LuceneTestCase {
  48
  49   @Test
  50   public void testComplementIterator() throws Exception {
  51     final int n = atLeast(10000);
  52     final OpenBitSet bits = new OpenBitSet(n);
  53     for (int i = 0; i < 5 * n; i++) {
  54       bits.flip(random.nextInt(n));
  55     }
  56
  57     OpenBitSet verify = new OpenBitSet(n);
  58     verify.or(bits);
  59
  60     ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n);
  61
  62     Directory dir = newDirectory();
  63     IndexReader reader = createReaderWithNDocs(random, n, dir);
  64     try {
  65       assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs,
  66         reader).size());
  67     } finally {
  68       reader.close();
  69       dir.close();
  70     }
  71   }
  72
  73   @Test
  74   public void testAllDocs() throws Exception {
  75     int maxDoc = 3;
  76     Directory dir = newDirectory();
  77     IndexReader reader = createReaderWithNDocs(random, maxDoc, dir);
  78     try {
  79       ScoredDocIDs all = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader);
  80       assertEquals("invalid size", maxDoc, all.size());
  81       ScoredDocIDsIterator iter = all.iterator();
  82       int doc = 0;
  83       while (iter.next()) {
  84         assertEquals("invalid doc ID: " + iter.getDocID(), doc++, iter.getDocID());
  85         assertEquals("invalid score: " + iter.getScore(), ScoredDocIDsIterator.DEFAULT_SCORE, iter.getScore(), 0.0f);
  86       }
  87       assertEquals("invalid maxDoc: " + doc, maxDoc, doc);
  88
  89       DocIdSet docIDs = all.getDocIDs();
  90       assertTrue("should be cacheable", docIDs.isCacheable());
  91       DocIdSetIterator docIDsIter = docIDs.iterator();
  92       assertEquals("nextDoc() hasn't been called yet", -1, docIDsIter.docID());
  93       assertEquals(0, docIDsIter.nextDoc());
  94       assertEquals(1, docIDsIter.advance(1));
  95       // if advance is smaller than current doc, advance to cur+1.
  96       assertEquals(2, docIDsIter.advance(0));
  97     } finally {
  98       reader.close();
  99       dir.close();
 100     }
 101   }
 102
 103   @Test
 104   public void testWithDeletions() throws Exception {
 105     int N_DOCS = 100;
 106
 107     DocumentFactory docFactory = new DocumentFactory(N_DOCS) {
 108       @Override
 109       public boolean markedDeleted(int docNum) {
 110         return (docNum % 3 == 0 ||        // every 3rd documents, including first
 111             docNum == numDocs - 1 ||     // last document
 112             docNum == numDocs / 2 ||     // 3 consecutive documents in the middle
 113             docNum == 1 + numDocs / 2 ||
 114             docNum == 2 + numDocs / 2);
 115       }
 116
 117       // every 6th document (starting from the 2nd) would contain 'alpha'
 118       @Override
 119       public boolean haveAlpha(int docNum) {
 120         return (docNum % 6 == 1);
 121       }
 122     };
 123
 124     Directory dir = newDirectory();
 125     IndexReader reader = createReaderWithNDocs(random, N_DOCS, docFactory, dir);
 126     try {
 127       ScoredDocIDs allDocs = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader);
 128       ScoredDocIDsIterator it = allDocs.iterator();
 129       int numIteratedDocs = 0;
 130       while (it.next()) {
 131         numIteratedDocs++;
 132         int docNum = it.getDocID();
 133         assertNull(
 134             "Deleted docs must not appear in the allDocsScoredDocIds set: " + docNum,
 135             reader.document(docNum).getFieldable("del"));
 136       }
 137
 138       assertEquals("Wrong number of (live) documents", allDocs.size(), numIteratedDocs);
 139
 140       // Get all 'alpha' documents
 141       ScoredDocIdCollector collector = ScoredDocIdCollector.create(reader.maxDoc(), false);
 142       Query q = new TermQuery(new Term(DocumentFactory.field, DocumentFactory.alphaTxt));
 143       IndexSearcher searcher = newSearcher(reader);
 144       searcher.search(q, collector);
 145       searcher.close();
 146
 147       ScoredDocIDs scoredDocIds = collector.getScoredDocIDs();
 148       OpenBitSet resultSet = new OpenBitSetDISI(scoredDocIds.getDocIDs().iterator(), reader.maxDoc());
 149
 150       // Getting the complement set of the query result
 151       ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader);
 152
 153       assertEquals("Number of documents in complement set mismatch",
 154           reader.numDocs() - scoredDocIds.size(), complementSet.size());
 155
 156       // now make sure the documents in the complement set are not deleted
 157       // and not in the original result set
 158       ScoredDocIDsIterator compIterator = complementSet.iterator();
 159       while (compIterator.next()) {
 160         int docNum = compIterator.getDocID();
 161         assertFalse(
 162             "Complement-Set must not contain deleted documents (doc="+docNum+")",
 163             reader.isDeleted(docNum));
 164         assertNull(
 165             "Complement-Set must not contain docs from the original set (doc="+ docNum+")",
 166             reader.document(docNum).getFieldable("del"));
 167         assertFalse(
 168             "Complement-Set must not contain docs from the original set (doc="+docNum+")",
 169             resultSet.fastGet(docNum));
 170       }
 171     } finally {
 172       reader.close();
 173       dir.close();
 174     }
 175   }
 176
 177   /**
 178    * Creates an index with n documents, this method is meant for testing purposes ONLY
 179    */
 180   static IndexReader createReaderWithNDocs(Random random, int nDocs, Directory directory) throws IOException {
 181     return createReaderWithNDocs(random, nDocs, new DocumentFactory(nDocs), directory);
 182   }
 183
 184   private static class DocumentFactory {
 185     protected final static String field = "content";
 186     protected final static String delTxt = "delete";
 187     protected final static String alphaTxt = "alpha";
 188
 189     private final static Field deletionMark = new Field(field, delTxt, Store.NO, Index.NOT_ANALYZED_NO_NORMS);
 190     private final static Field alphaContent = new Field(field, alphaTxt, Store.NO, Index.NOT_ANALYZED_NO_NORMS);
 191
 192     protected final int numDocs;
 193
 194     public DocumentFactory(int totalNumDocs) {
 195       this.numDocs = totalNumDocs;
 196     }
 197
 198     public boolean markedDeleted(int docNum) {
 199       return false;
 200     }
 201
 202     public Document getDoc(int docNum) {
 203       Document doc = new Document();
 204       if (markedDeleted(docNum)) {
 205         doc.add(deletionMark);
 206         // Add a special field for docs that are marked for deletion. Later we
 207         // assert that those docs are not returned by all-scored-doc-IDs.
 208         doc.add(new Field("del", Integer.toString(docNum), Store.YES, Index.NO));
 209       }
 210
 211       if (haveAlpha(docNum)) {
 212         doc.add(alphaContent);
 213       }
 214       return doc;
 215     }
 216
 217     public boolean haveAlpha(int docNum) {
 218       return false;
 219     }
 220   }
 221
 222   static IndexReader createReaderWithNDocs(Random random, int nDocs, DocumentFactory docFactory, Directory dir) throws IOException {
 223     RandomIndexWriter writer = new RandomIndexWriter(random, dir,
 224         newIndexWriterConfig(random, TEST_VERSION_CURRENT,
 225             new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
 226     for (int docNum = 0; docNum < nDocs; docNum++) {
 227       writer.addDocument(docFactory.getDoc(docNum));
 228     }
 229     writer.close();
 230
 231     // Delete documents marked for deletion
 232     IndexReader reader = IndexReader.open(dir, false);
 233     reader.deleteDocuments(new Term(DocumentFactory.field, DocumentFactory.delTxt));
 234     reader.close();
 235
 236     // Open a fresh read-only reader with the deletions in place
 237     return IndexReader.open(dir, true);
 238   }
 239 }