lucene-java-3.4.0/lucene/contrib/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java

   1 package org.apache.lucene.facet.search;
   2
   3 import java.io.IOException;
   4 import java.io.Reader;
   5 import java.util.HashSet;
   6 import java.util.Set;
   7
   8 import org.apache.lucene.analysis.Analyzer;
   9 import org.apache.lucene.analysis.MockAnalyzer;
  10 import org.apache.lucene.analysis.MockTokenizer;
  11 import org.apache.lucene.analysis.TokenStream;
  12 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  13 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  14 import org.apache.lucene.document.Document;
  15 import org.apache.lucene.document.Field;
  16 import org.apache.lucene.index.IndexReader;
  17 import org.apache.lucene.index.Payload;
  18 import org.apache.lucene.index.RandomIndexWriter;
  19 import org.apache.lucene.index.Term;
  20 import org.apache.lucene.store.Directory;
  21 import org.junit.Test;
  22
  23 import org.apache.lucene.util.LuceneTestCase;
  24 import org.apache.lucene.facet.search.CategoryListIterator;
  25 import org.apache.lucene.facet.search.PayloadIntDecodingIterator;
  26 import org.apache.lucene.util.UnsafeByteArrayOutputStream;
  27 import org.apache.lucene.util.encoding.DGapIntEncoder;
  28 import org.apache.lucene.util.encoding.IntEncoder;
  29 import org.apache.lucene.util.encoding.SortingIntEncoder;
  30 import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
  31 import org.apache.lucene.util.encoding.VInt8IntEncoder;
  32
  33 /**
  34  * Licensed to the Apache Software Foundation (ASF) under one or more
  35  * contributor license agreements.  See the NOTICE file distributed with
  36  * this work for additional information regarding copyright ownership.
  37  * The ASF licenses this file to You under the Apache License, Version 2.0
  38  * (the "License"); you may not use this file except in compliance with
  39  * the License.  You may obtain a copy of the License at
  40  *
  41  *     http://www.apache.org/licenses/LICENSE-2.0
  42  *
  43  * Unless required by applicable law or agreed to in writing, software
  44  * distributed under the License is distributed on an "AS IS" BASIS,
  45  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  46  * See the License for the specific language governing permissions and
  47  * limitations under the License.
  48  */
  49
  50 public class CategoryListIteratorTest extends LuceneTestCase {
  51
  52   private static final class DataTokenStream extends TokenStream {
  53
  54     private int idx;
  55     private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
  56     private byte[] buf = new byte[20];
  57     UnsafeByteArrayOutputStream ubaos = new UnsafeByteArrayOutputStream(buf);
  58     IntEncoder encoder;
  59     private boolean exhausted = false;
  60     private CharTermAttribute term = addAttribute(CharTermAttribute.class);
  61
  62     public DataTokenStream(String text, IntEncoder encoder) throws IOException {
  63       this.encoder = encoder;
  64       term.setEmpty().append(text);
  65     }
  66
  67     public void setIdx(int idx) {
  68       this.idx = idx;
  69       exhausted = false;
  70     }
  71
  72     @Override
  73     public boolean incrementToken() throws IOException {
  74       if (exhausted) {
  75         return false;
  76       }
  77
  78       int[] values = data[idx];
  79       ubaos.reInit(buf);
  80       encoder.reInit(ubaos);
  81       for (int val : values) {
  82         encoder.encode(val);
  83       }
  84       encoder.close();
  85       payload.setPayload(new Payload(buf, 0, ubaos.length()));
  86
  87       exhausted = true;
  88       return true;
  89     }
  90
  91   }
  92
  93   static final int[][] data = new int[][] {
  94     new int[] { 1, 2 }, new int[] { 3, 4 }, new int[] { 1, 3 }, new int[] { 1, 2, 3, 4 },
  95   };
  96
  97   @Test
  98   public void testPayloadIntDecodingIterator() throws Exception {
  99     Directory dir = newDirectory();
 100     DataTokenStream dts = new DataTokenStream("1",new SortingIntEncoder(
 101         new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
 102     RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT,
 103         new MockAnalyzer(random, MockTokenizer.KEYWORD, false)).setMergePolicy(newLogMergePolicy()));
 104     for (int i = 0; i < data.length; i++) {
 105       dts.setIdx(i);
 106       Document doc = new Document();
 107       doc.add(new Field("f", dts));
 108       writer.addDocument(doc);
 109     }
 110     IndexReader reader = writer.getReader();
 111     writer.close();
 112
 113     CategoryListIterator cli = new PayloadIntDecodingIterator(reader, new Term(
 114         "f","1"), dts.encoder.createMatchingDecoder());
 115     cli.init();
 116     int totalCategories = 0;
 117     for (int i = 0; i < data.length; i++) {
 118       Set<Integer> values = new HashSet<Integer>();
 119       for (int j = 0; j < data[i].length; j++) {
 120         values.add(data[i][j]);
 121       }
 122       cli.skipTo(i);
 123       long cat;
 124       while ((cat = cli.nextCategory()) < Integer.MAX_VALUE) {
 125         assertTrue("expected category not found: " + cat, values.contains((int) cat));
 126         totalCategories ++;
 127       }
 128     }
 129     assertEquals("Missing categories!",10,totalCategories);
 130     reader.close();
 131     dir.close();
 132   }
 133
 134   /**
 135    * Test that a document with no payloads does not confuse the payload decoder.
 136    */
 137   @Test
 138   public void testPayloadIteratorWithInvalidDoc() throws Exception {
 139     Directory dir = newDirectory();
 140     DataTokenStream dts = new DataTokenStream("1",new SortingIntEncoder(
 141         new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
 142     DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
 143         new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
 144     // this test requires that no payloads ever be randomly present!
 145     final Analyzer noPayloadsAnalyzer = new Analyzer() {
 146       @Override
 147       public TokenStream tokenStream(String fieldName, Reader reader) {
 148         return new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
 149       }
 150     };
 151     // NOTE: test is wired to LogMP... because test relies on certain docids having payloads
 152     RandomIndexWriter writer = new RandomIndexWriter(random, dir,
 153         newIndexWriterConfig(TEST_VERSION_CURRENT, noPayloadsAnalyzer).setMergePolicy(newLogMergePolicy()));
 154     for (int i = 0; i < data.length; i++) {
 155       dts.setIdx(i);
 156       Document doc = new Document();
 157       if (i==0 || i == 2) {
 158         doc.add(new Field("f", dts)); // only docs 0 & 2 have payloads!
 159       }
 160       dts2.setIdx(i);
 161       doc.add(new Field("f", dts2));
 162       writer.addDocument(doc);
 163       writer.commit();
 164     }
 165
 166     // add more documents to expose the bug.
 167     // for some reason, this bug is not exposed unless these additional documents are added.
 168     for (int i = 0; i < 10; ++i) {
 169       Document d = new Document();
 170       dts.setIdx(2);
 171       d.add(new Field("f", dts2));
 172       writer.addDocument(d);
 173       if (i %10 == 0) {
 174         writer.commit();
 175       }
 176
 177     }
 178
 179     IndexReader reader = writer.getReader();
 180     writer.close();
 181
 182     CategoryListIterator cli = new PayloadIntDecodingIterator(reader, new Term(
 183         "f","1"), dts.encoder.createMatchingDecoder());
 184     cli.init();
 185     int totalCats = 0;
 186     for (int i = 0; i < data.length; i++) {
 187       // doc no. i
 188       Set<Integer> values = new HashSet<Integer>();
 189       for (int j = 0; j < data[i].length; j++) {
 190         values.add(data[i][j]);
 191       }
 192       boolean hasDoc = cli.skipTo(i);
 193       if (hasDoc) {
 194         assertTrue("Document "+i+" must not have a payload!", i==0 || i==2 );
 195         long cat;
 196         while ((cat = cli.nextCategory()) < Integer.MAX_VALUE) {
 197           assertTrue("expected category not found: " + cat, values.contains((int) cat));
 198           ++totalCats;
 199         }
 200       } else {
 201         assertFalse("Document "+i+" must have a payload!", i==0 || i==2 );
 202       }
 203
 204     }
 205     assertEquals("Wrong number of total categories!", 4, totalCats);
 206
 207     // Ok.. went through the first 4 docs, now lets try the 6th doc (docid 5)
 208     assertFalse("Doc #6 (docid=5) should not have a payload!",cli.skipTo(5));
 209     reader.close();
 210     dir.close();
 211   }
 212
 213 }