lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java

   1 package org.apache.lucene.search.payloads;
   2 /**
   3  * Licensed to the Apache Software Foundation (ASF) under one or more
   4  * contributor license agreements.  See the NOTICE file distributed with
   5  * this work for additional information regarding copyright ownership.
   6  * The ASF licenses this file to You under the Apache License, Version 2.0
   7  * (the "License"); you may not use this file except in compliance with
   8  * the License.  You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  */
  18
  19 import org.apache.lucene.util.LuceneTestCase;
  20 import org.apache.lucene.util.English;
  21 import org.apache.lucene.search.IndexSearcher;
  22 import org.apache.lucene.search.QueryUtils;
  23 import org.apache.lucene.search.TopDocs;
  24 import org.apache.lucene.search.ScoreDoc;
  25 import org.apache.lucene.search.CheckHits;
  26 import org.apache.lucene.search.BooleanClause;
  27 import org.apache.lucene.search.BooleanQuery;
  28 import org.apache.lucene.search.DefaultSimilarity;
  29 import org.apache.lucene.search.spans.SpanTermQuery;
  30 import org.apache.lucene.search.spans.Spans;
  31 import org.apache.lucene.search.spans.TermSpans;
  32 import org.apache.lucene.analysis.Analyzer;
  33 import org.apache.lucene.analysis.TokenStream;
  34 import org.apache.lucene.analysis.LowerCaseTokenizer;
  35 import org.apache.lucene.analysis.TokenFilter;
  36 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  37 import org.apache.lucene.index.FieldInvertState;
  38 import org.apache.lucene.index.IndexReader;
  39 import org.apache.lucene.index.Payload;
  40 import org.apache.lucene.index.RandomIndexWriter;
  41 import org.apache.lucene.index.Term;
  42 import org.apache.lucene.store.Directory;
  43 import org.apache.lucene.document.Document;
  44 import org.apache.lucene.document.Field;
  45
  46 import java.io.Reader;
  47 import java.io.IOException;
  48
  49
  50 /**
  51  *
  52  *
  53  **/
  54 public class TestPayloadTermQuery extends LuceneTestCase {
  55   private IndexSearcher searcher;
  56   private IndexReader reader;
  57   private BoostingSimilarity similarity = new BoostingSimilarity();
  58   private byte[] payloadField = new byte[]{1};
  59   private byte[] payloadMultiField1 = new byte[]{2};
  60   private byte[] payloadMultiField2 = new byte[]{4};
  61   protected Directory directory;
  62
  63   private class PayloadAnalyzer extends Analyzer {
  64
  65
  66     @Override
  67     public TokenStream tokenStream(String fieldName, Reader reader) {
  68       TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader);
  69       result = new PayloadFilter(result, fieldName);
  70       return result;
  71     }
  72   }
  73
  74   private class PayloadFilter extends TokenFilter {
  75     String fieldName;
  76     int numSeen = 0;
  77
  78     PayloadAttribute payloadAtt;
  79
  80     public PayloadFilter(TokenStream input, String fieldName) {
  81       super(input);
  82       this.fieldName = fieldName;
  83       payloadAtt = addAttribute(PayloadAttribute.class);
  84     }
  85
  86     @Override
  87     public boolean incrementToken() throws IOException {
  88       boolean hasNext = input.incrementToken();
  89       if (hasNext) {
  90         if (fieldName.equals("field")) {
  91           payloadAtt.setPayload(new Payload(payloadField));
  92         } else if (fieldName.equals("multiField")) {
  93           if (numSeen % 2 == 0) {
  94             payloadAtt.setPayload(new Payload(payloadMultiField1));
  95           } else {
  96             payloadAtt.setPayload(new Payload(payloadMultiField2));
  97           }
  98           numSeen++;
  99         }
 100         return true;
 101       } else {
 102         return false;
 103       }
 104     }
 105   }
 106
 107   @Override
 108   public void setUp() throws Exception {
 109     super.setUp();
 110     directory = newDirectory();
 111     RandomIndexWriter writer = new RandomIndexWriter(random, directory,
 112         newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer())
 113         .setSimilarity(similarity).setMergePolicy(newLogMergePolicy()));
 114     //writer.infoStream = System.out;
 115     for (int i = 0; i < 1000; i++) {
 116       Document doc = new Document();
 117       Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED);
 118       //noPayloadField.setBoost(0);
 119       doc.add(noPayloadField);
 120       doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
 121       doc.add(newField("multiField", English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
 122       writer.addDocument(doc);
 123     }
 124     reader = writer.getReader();
 125     writer.close();
 126
 127     searcher = newSearcher(reader);
 128     searcher.setSimilarity(similarity);
 129   }
 130
 131   @Override
 132   public void tearDown() throws Exception {
 133     searcher.close();
 134     reader.close();
 135     directory.close();
 136     super.tearDown();
 137   }
 138
 139   public void test() throws IOException {
 140     PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"),
 141             new MaxPayloadFunction());
 142     TopDocs hits = searcher.search(query, null, 100);
 143     assertTrue("hits is null and it shouldn't be", hits != null);
 144     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 145
 146     //they should all have the exact same score, because they all contain seventy once, and we set
 147     //all the other similarity factors to be 1
 148
 149     assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1);
 150     for (int i = 0; i < hits.scoreDocs.length; i++) {
 151       ScoreDoc doc = hits.scoreDocs[i];
 152       assertTrue(doc.score + " does not equal: " + 1, doc.score == 1);
 153     }
 154     CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true);
 155     Spans spans = query.getSpans(searcher.getIndexReader());
 156     assertTrue("spans is null and it shouldn't be", spans != null);
 157     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 158     /*float score = hits.score(0);
 159     for (int i =1; i < hits.length(); i++)
 160     {
 161       assertTrue("scores are not equal and they should be", score == hits.score(i));
 162     }*/
 163
 164   }
 165
 166   public void testQuery() {
 167     PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 168         new MaxPayloadFunction());
 169     QueryUtils.check(boostingFuncTermQuery);
 170
 171     SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"));
 172
 173     assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery));
 174
 175     PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 176         new AveragePayloadFunction());
 177
 178     QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2);
 179   }
 180
 181   public void testMultipleMatchesPerDoc() throws Exception {
 182     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 183             new MaxPayloadFunction());
 184     TopDocs hits = searcher.search(query, null, 100);
 185     assertTrue("hits is null and it shouldn't be", hits != null);
 186     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 187
 188     //they should all have the exact same score, because they all contain seventy once, and we set
 189     //all the other similarity factors to be 1
 190
 191     //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
 192     assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
 193     //there should be exactly 10 items that score a 4, all the rest should score a 2
 194     //The 10 items are: 70 + i*100 where i in [0-9]
 195     int numTens = 0;
 196     for (int i = 0; i < hits.scoreDocs.length; i++) {
 197       ScoreDoc doc = hits.scoreDocs[i];
 198       if (doc.doc % 10 == 0) {
 199         numTens++;
 200         assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
 201       } else {
 202         assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
 203       }
 204     }
 205     assertTrue(numTens + " does not equal: " + 10, numTens == 10);
 206     CheckHits.checkExplanations(query, "field", searcher, true);
 207     Spans spans = query.getSpans(searcher.getIndexReader());
 208     assertTrue("spans is null and it shouldn't be", spans != null);
 209     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 210     //should be two matches per document
 211     int count = 0;
 212     //100 hits times 2 matches per hit, we should have 200 in count
 213     while (spans.next()) {
 214       count++;
 215     }
 216     assertTrue(count + " does not equal: " + 200, count == 200);
 217   }
 218
 219   //Set includeSpanScore to false, in which case just the payload score comes through.
 220   public void testIgnoreSpanScorer() throws Exception {
 221     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
 222             new MaxPayloadFunction(), false);
 223
 224     IndexSearcher theSearcher = new IndexSearcher(directory, true);
 225     theSearcher.setSimilarity(new FullSimilarity());
 226     TopDocs hits = searcher.search(query, null, 100);
 227     assertTrue("hits is null and it shouldn't be", hits != null);
 228     assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
 229
 230     //they should all have the exact same score, because they all contain seventy once, and we set
 231     //all the other similarity factors to be 1
 232
 233     //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
 234     assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
 235     //there should be exactly 10 items that score a 4, all the rest should score a 2
 236     //The 10 items are: 70 + i*100 where i in [0-9]
 237     int numTens = 0;
 238     for (int i = 0; i < hits.scoreDocs.length; i++) {
 239       ScoreDoc doc = hits.scoreDocs[i];
 240       if (doc.doc % 10 == 0) {
 241         numTens++;
 242         assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
 243       } else {
 244         assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
 245       }
 246     }
 247     assertTrue(numTens + " does not equal: " + 10, numTens == 10);
 248     CheckHits.checkExplanations(query, "field", searcher, true);
 249     Spans spans = query.getSpans(searcher.getIndexReader());
 250     assertTrue("spans is null and it shouldn't be", spans != null);
 251     assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans);
 252     //should be two matches per document
 253     int count = 0;
 254     //100 hits times 2 matches per hit, we should have 200 in count
 255     while (spans.next()) {
 256       count++;
 257     }
 258     theSearcher.close();
 259   }
 260
 261   public void testNoMatch() throws Exception {
 262     PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"),
 263             new MaxPayloadFunction());
 264     TopDocs hits = searcher.search(query, null, 100);
 265     assertTrue("hits is null and it shouldn't be", hits != null);
 266     assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0);
 267
 268   }
 269
 270   public void testNoPayload() throws Exception {
 271     PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"),
 272             new MaxPayloadFunction());
 273     PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"),
 274             new MaxPayloadFunction());
 275     BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST);
 276     BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT);
 277     BooleanQuery query = new BooleanQuery();
 278     query.add(c1);
 279     query.add(c2);
 280     TopDocs hits = searcher.search(query, null, 100);
 281     assertTrue("hits is null and it shouldn't be", hits != null);
 282     assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1);
 283     int[] results = new int[1];
 284     results[0] = 0;//hits.scoreDocs[0].doc;
 285     CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results);
 286   }
 287
 288   // must be static for weight serialization tests
 289   static class BoostingSimilarity extends DefaultSimilarity {
 290
 291     // TODO: Remove warning after API has been finalized
 292     @Override
 293     public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) {
 294       //we know it is size 4 here, so ignore the offset/length
 295       return payload[0];
 296     }
 297
 298     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 299     //Make everything else 1 so we see the effect of the payload
 300     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 301     @Override
 302     public float computeNorm(String fieldName, FieldInvertState state) {
 303       return state.getBoost();
 304     }
 305
 306     @Override
 307     public float queryNorm(float sumOfSquaredWeights) {
 308       return 1;
 309     }
 310
 311     @Override
 312     public float sloppyFreq(int distance) {
 313       return 1;
 314     }
 315
 316     @Override
 317     public float coord(int overlap, int maxOverlap) {
 318       return 1;
 319     }
 320
 321     @Override
 322     public float idf(int docFreq, int numDocs) {
 323       return 1;
 324     }
 325
 326     @Override
 327     public float tf(float freq) {
 328       return freq == 0 ? 0 : 1;
 329     }
 330   }
 331
 332   static class FullSimilarity extends DefaultSimilarity{
 333     public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) {
 334       //we know it is size 4 here, so ignore the offset/length
 335       return payload[0];
 336     }
 337   }
 338
 339 }