1 package org.apache.lucene.search.similar;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.io.StringReader;
22 import java.util.HashMap;
23 import java.util.List;
26 import org.apache.lucene.analysis.MockAnalyzer;
27 import org.apache.lucene.analysis.MockTokenizer;
28 import org.apache.lucene.document.Document;
29 import org.apache.lucene.document.Field;
30 import org.apache.lucene.index.IndexReader;
31 import org.apache.lucene.index.RandomIndexWriter;
32 import org.apache.lucene.search.BooleanClause;
33 import org.apache.lucene.search.BooleanQuery;
34 import org.apache.lucene.search.IndexSearcher;
35 import org.apache.lucene.search.TermQuery;
36 import org.apache.lucene.store.Directory;
37 import org.apache.lucene.util.LuceneTestCase;
39 public class TestMoreLikeThis extends LuceneTestCase {
40 private Directory directory;
41 private IndexReader reader;
42 private IndexSearcher searcher;
45 public void setUp() throws Exception {
47 directory = newDirectory();
48 RandomIndexWriter writer = new RandomIndexWriter(random, directory);
50 // Add series of docs with specific information for MoreLikeThis
51 addDoc(writer, "lucene");
52 addDoc(writer, "lucene release");
54 reader = writer.getReader();
56 searcher = newSearcher(reader);
60 public void tearDown() throws Exception {
67 private void addDoc(RandomIndexWriter writer, String text) throws IOException {
68 Document doc = new Document();
69 doc.add(newField("text", text, Field.Store.YES, Field.Index.ANALYZED));
70 writer.addDocument(doc);
73 public void testBoostFactor() throws Throwable {
74 Map<String,Float> originalValues = getOriginalValues();
76 MoreLikeThis mlt = new MoreLikeThis(reader);
77 mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
79 mlt.setMinTermFreq(1);
81 mlt.setFieldNames(new String[] {"text"});
84 // this mean that every term boost factor will be multiplied by this
86 float boostFactor = 5;
87 mlt.setBoostFactor(boostFactor);
89 BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
90 "lucene release"), "text");
91 List<BooleanClause> clauses = query.clauses();
93 assertEquals("Expected " + originalValues.size() + " clauses.",
94 originalValues.size(), clauses.size());
96 for (int i = 0; i < clauses.size(); i++) {
97 BooleanClause clause = clauses.get(i);
98 TermQuery tq = (TermQuery) clause.getQuery();
99 Float termBoost = originalValues.get(tq.getTerm().text());
100 assertNotNull("Expected term " + tq.getTerm().text(), termBoost);
102 float totalBoost = termBoost.floatValue() * boostFactor;
103 assertEquals("Expected boost of " + totalBoost + " for term '"
104 + tq.getTerm().text() + "' got " + tq.getBoost(), totalBoost, tq
105 .getBoost(), 0.0001);
109 private Map<String,Float> getOriginalValues() throws IOException {
110 Map<String,Float> originalValues = new HashMap<String,Float>();
111 MoreLikeThis mlt = new MoreLikeThis(reader);
112 mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
113 mlt.setMinDocFreq(1);
114 mlt.setMinTermFreq(1);
115 mlt.setMinWordLen(1);
116 mlt.setFieldNames(new String[] {"text"});
118 BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
119 "lucene release"), "text");
120 List<BooleanClause> clauses = query.clauses();
122 for (int i = 0; i < clauses.size(); i++) {
123 BooleanClause clause = clauses.get(i);
124 TermQuery tq = (TermQuery) clause.getQuery();
125 originalValues.put(tq.getTerm().text(), Float.valueOf(tq.getBoost()));
127 return originalValues;
131 public void testMultiFields() throws Exception {
132 MoreLikeThis mlt = new MoreLikeThis(reader);
133 mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
134 mlt.setMinDocFreq(1);
135 mlt.setMinTermFreq(1);
136 mlt.setMinWordLen(1);
137 mlt.setFieldNames(new String[] {"text", "foobar"});
138 mlt.like(new StringReader("this is a test"), "foobar");
143 public void testMultiFieldsDeprecated() throws Exception {
144 MoreLikeThis mlt = new MoreLikeThis(reader);
145 mlt.setAnalyzer(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
146 mlt.setMinDocFreq(1);
147 mlt.setMinTermFreq(1);
148 mlt.setMinWordLen(1);
149 mlt.setFieldNames(new String[] {"text", "foobar"});
150 mlt.like(new StringReader("this is a test"));