lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java

   1 package org.apache.lucene.search.payloads;
   2 /**
   3  * Licensed to the Apache Software Foundation (ASF) under one or more
   4  * contributor license agreements.  See the NOTICE file distributed with
   5  * this work for additional information regarding copyright ownership.
   6  * The ASF licenses this file to You under the Apache License, Version 2.0
   7  * (the "License"); you may not use this file except in compliance with
   8  * the License.  You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import org.apache.lucene.util.LuceneTestCase;
  20 import org.apache.lucene.util.English;
  21 import org.apache.lucene.search.IndexSearcher;
  22 import org.apache.lucene.search.QueryUtils;
  23 import org.apache.lucene.search.TopDocs;
  24 import org.apache.lucene.search.ScoreDoc;
  25 import org.apache.lucene.search.CheckHits;
  26 import org.apache.lucene.search.BooleanClause;
  27 import org.apache.lucene.search.BooleanQuery;
  28 import org.apache.lucene.search.DefaultSimilarity;
  29 import org.apache.lucene.search.spans.SpanTermQuery;
  30 import org.apache.lucene.search.spans.Spans;
  31 import org.apache.lucene.search.spans.TermSpans;
  32 import org.apache.lucene.analysis.Analyzer;
  33 import org.apache.lucene.analysis.TokenStream;
  34 import org.apache.lucene.analysis.LowerCaseTokenizer;
  35 import org.apache.lucene.analysis.TokenFilter;
  36 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  37 import org.apache.lucene.index.FieldInvertState;
  38 import org.apache.lucene.index.IndexReader;
  39 import org.apache.lucene.index.Payload;
  40 import org.apache.lucene.index.RandomIndexWriter;
  41 import org.apache.lucene.index.Term;
  42 import org.apache.lucene.store.Directory;
  43 import org.apache.lucene.document.Document;
  44 import org.apache.lucene.document.Field;
  45
  46 import java.io.Reader;
  47 import java.io.IOException;
  48
  49
  50 /**
  51  *
  52  *
  53  **/
  54 public class TestPayloadTermQuery extends LuceneTestCase {
  55   private IndexSearcher searcher;
  56   private IndexReader reader;
  57   private BoostingSimilarity similarity = new BoostingSimilarity();
  58   private byte[] payloadField = new byte[]{1};
  59   private byte[] payloadMultiField1 = new byte[]{2};
  60   private byte[] payloadMultiField2 = new byte[]{4};
  61   protected Directory directory;
  62
  63   private class PayloadAnalyzer extends Analyzer {
  64
  65
  66     @Override
  67     public TokenStream tokenStream(String fieldName, Reader reader) {
  68       TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
  69       result = new PayloadFilter(result, fieldName);
  70       return result;
  71     }
  72   }
  73
  74   private class PayloadFilter extends TokenFilter {
  75     private final String fieldName;
  76     private int numSeen = 0;
  77
  78     private final PayloadAttribute payloadAtt;
  79
  80     public PayloadFilter(TokenStream input, String fieldName) {
  81       super(input);
  82       this.fieldName = fieldName;
  83       payloadAtt = addAttribute(PayloadAttribute.class);
  84     }
  85
  86     @Override
  87     public boolean incrementToken() throws IOException {
  88       boolean hasNext = input.incrementToken();
  89       if (hasNext) {
  90         if (fieldName.equals("field")) {
  91           payloadAtt.setPayload(new Payload(payloadField));
  92         } else if (fieldName.equals("multiField")) {
  93           if (numSeen % 2 == 0) {
  94             payloadAtt.setPayload(new Payload(payloadMultiField1));
  95           } else {
  96             payloadAtt.setPayload(new Payload(payloadMultiField2));
  97           }
  98           numSeen++;
  99         }
 100         return true;
 101       } else {
 102         return false;
 103       }
 104     }
 105
 106     @Override
 107     public void reset() throws IOException {
 108       super.reset();
 109       this.numSeen = 0;
 110     }
 111   }
 112
 113   @Override
 114   public void setUp() throws Exception {
 115     super.setUp();
 116     directory = newDirectory();
 117     RandomIndexWriter writer = new RandomIndexWriter(random, directory,
 118         newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
 119         .setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
 120     //writer.infoStream = System.out;
 121     for (int i = 0; i < 1000; i++) {
 122       Document doc = new Document();
 123       Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
 124       //noPayloadField.setBoost(0);
 125       doc.add(noPayloadField);
 126       doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
 127       doc.add(newField("multiField", English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
 128       writer.addDocument(doc);
 129     }
 130     reader = writer.getReader();
 131     writer.close();
 132
 133     searcher = newSearcher(reader);
 134     searcher.setSimilarity(similarity);
 135   }
 136
 137   @Override
 138   public void tearDown() throws Exception {
 139     searcher.close();
 140     reader.close();
 141     directory.close();
 142     super.tearDown();
 143   }
 144
 145   public void test() throws IOException {
 146     PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
 147             new MaxPayloadFunction());
 148     TopDocs hits = searcher.search(query, null, 100);
 149     assertTrue("hits is null and it shouldn't be", hits != null);
 150     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 151
 152     //they should all have the exact same score, because they all contain seventy once, and we set
 153     //all the other similarity factors to be 1
 154
 155     assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
 156     for (int i = 0; i < hits.scoreDocs.length; i++) {
 157       ScoreDoc doc = hits.scoreDocs[i];
 158       assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
 159     }
 160     CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
 161     Spans spans = query.getSpans(searcher.getIndexReader());
 162     assertTrue("spans is null and it shouldn't be", spans != null);
 163     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 164     /*float score = hits.score(0);
 165     for (int i =1; i < hits.length(); i++)
 166     {
 167       assertTrue("scores are not equal and they should be", score == hits.score(i));
 168     }*/
 169
 170   }
 171
 172   public void testQuery() {
 173     PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 174         new MaxPayloadFunction());
 175     QueryUtils.check(boostingFuncTermQuery);
 176
 177     SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
 178
 179     assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
 180
 181     PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 182         new AveragePayloadFunction());
 183
 184     QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
 185   }
 186
 187   public void testMultipleMatchesPerDoc() throws Exception {
 188     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 189             new MaxPayloadFunction());
 190     TopDocs hits = searcher.search(query, null, 100);
 191     assertTrue("hits is null and it shouldn't be", hits != null);
 192     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 193
 194     //they should all have the exact same score, because they all contain seventy once, and we set
 195     //all the other similarity factors to be 1
 196
 197     //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
 198     assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
 199     //there should be exactly 10 items that score a 4, all the rest should score a 2
 200     //The 10 items are: 70 + i*100 where i in [0-9]
 201     int numTens = 0;
 202     for (int i = 0; i < hits.scoreDocs.length; i++) {
 203       ScoreDoc doc = hits.scoreDocs[i];
 204       if (doc.doc % 10 == 0) {
 205         numTens++;
 206         assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
 207       } else {
 208         assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
 209       }
 210     }
 211     assertTrue(numTens + " does not equal: " + 10, numTens == 10);
 212     CheckHits.checkExplanations(query, "field", searcher, true);
 213     Spans spans = query.getSpans(searcher.getIndexReader());
 214     assertTrue("spans is null and it shouldn't be", spans != null);
 215     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 216     //should be two matches per document
 217     int count = 0;
 218     //100 hits times 2 matches per hit, we should have 200 in count
 219     while (spans.next()) {
 220       count++;
 221     }
 222     assertTrue(count + " does not equal: " + 200, count == 200);
 223   }
 224
 225   //Set includeSpanScore to false, in which case just the payload score comes through.
 226   public void testIgnoreSpanScorer() throws Exception {
 227     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 228             new MaxPayloadFunction(), false);
 229
 230     IndexSearcher theSearcher = new IndexSearcher(directory, true);
 231     theSearcher.setSimilarity(new FullSimilarity());
 232     TopDocs hits = searcher.search(query, null, 100);
 233     assertTrue("hits is null and it shouldn't be", hits != null);
 234     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 235
 236     //they should all have the exact same score, because they all contain seventy once, and we set
 237     //all the other similarity factors to be 1
 238
 239     //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
 240     assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
 241     //there should be exactly 10 items that score a 4, all the rest should score a 2
 242     //The 10 items are: 70 + i*100 where i in [0-9]
 243     int numTens = 0;
 244     for (int i = 0; i < hits.scoreDocs.length; i++) {
 245       ScoreDoc doc = hits.scoreDocs[i];
 246       if (doc.doc % 10 == 0) {
 247         numTens++;
 248         assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
 249       } else {
 250         assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
 251       }
 252     }
 253     assertTrue(numTens + " does not equal: " + 10, numTens == 10);
 254     CheckHits.checkExplanations(query, "field", searcher, true);
 255     Spans spans = query.getSpans(searcher.getIndexReader());
 256     assertTrue("spans is null and it shouldn't be", spans != null);
 257     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 258     //should be two matches per document
 259     int count = 0;
 260     //100 hits times 2 matches per hit, we should have 200 in count
 261     while (spans.next()) {
 262       count++;
 263     }
 264     theSearcher.close();
 265   }
 266
 267   public void testNoMatch() throws Exception {
 268     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
 269             new MaxPayloadFunction());
 270     TopDocs hits = searcher.search(query, null, 100);
 271     assertTrue("hits is null and it shouldn't be", hits != null);
 272     assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
 273
 274   }
 275
 276   public void testNoPayload() throws Exception {
 277     PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
 278             new MaxPayloadFunction());
 279     PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
 280             new MaxPayloadFunction());
 281     BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
 282     BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
 283     BooleanQuery query = new BooleanQuery();
 284     query.add(c1);
 285     query.add(c2);
 286     TopDocs hits = searcher.search(query, null, 100);
 287     assertTrue("hits is null and it shouldn't be", hits != null);
 288     assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
 289     int[] results = new int[1];
 290     results[0] = 0;//hits.scoreDocs[0].doc;
 291     CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
 292   }
 293
 294   // must be static for weight serialization tests
 295   static class BoostingSimilarity extends DefaultSimilarity {
 296
 297     // TODO: Remove warning after API has been finalized
 298     @Override
 299     public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
 300       //we know it is size 4 here, so ignore the offset/length
 301       return payload[0];
 302     }
 303
 304     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 305     //Make everything else 1 so we see the effect of the payload
 306     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 307     @Override
 308     public float computeNorm(String fieldName, FieldInvertState state) {
 309       return state.getBoost();
 310     }
 311
 312     @Override
 313     public float queryNorm(float sumOfSquaredWeights) {
 314       return 1;
 315     }
 316
 317     @Override
 318     public float sloppyFreq(int distance) {
 319       return 1;
 320     }
 321
 322     @Override
 323     public float coord(int overlap, int maxOverlap) {
 324       return 1;
 325     }
 326
 327     @Override
 328     public float idf(int docFreq, int numDocs) {
 329       return 1;
 330     }
 331
 332     @Override
 333     public float tf(float freq) {
 334       return freq == 0 ? 0 : 1;
 335     }
 336   }
 337
 338   static class FullSimilarity extends DefaultSimilarity{
 339     public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
 340       //we know it is size 4 here, so ignore the offset/length
 341       return payload[0];
 342     }
 343   }
 344
 345 }