1 package org.apache.lucene.search;
4 * Licensed to the Apache Software Foundation (ASF) under one or more
5 * contributor license agreements. See the NOTICE file distributed with
6 * this work for additional information regarding copyright ownership.
7 * The ASF licenses this file to You under the Apache License, Version 2.0
8 * (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 import java.io.IOException;
21 import java.util.HashMap;
22 import java.util.Iterator;
23 import java.util.Locale;
24 import java.util.Random;
26 import org.apache.lucene.analysis.SimpleAnalyzer;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.index.IndexReader;
30 import org.apache.lucene.index.IndexWriter;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.store.Directory;
33 import org.junit.AfterClass;
34 import org.junit.Before;
35 import org.junit.BeforeClass;
36 import org.junit.Test;
39 * Unit tests for remote sorting code.
40 * Note: This is a modified copy of {@link TestSort} without duplicated test
41 * methods and therefore unused members and methodes.
44 public class TestRemoteSort extends RemoteTestCase {
46 private static IndexSearcher full;
47 private static Directory indexStore;
55 // the tracer field is used to determine which document was hit
56 // the contents field is used to search and sort by relevance
57 // the int field to sort by int
58 // the float field to sort by float
59 // the string field to sort by string
60 // the i18n field includes accented characters for testing locale-specific sorting
61 private static final String[][] data = new String[][] {
62 // tracer contents int float string custom i18n long double, 'short', byte, 'custom parser encoding'
63 { "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J"},//A, x
64 { "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I"},//B, y
65 { "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999", "40.00002343", "125", "15", "H"},//C, x
66 { "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", String.valueOf(Long.MAX_VALUE), String.valueOf(Double.MIN_VALUE), String.valueOf(Short.MIN_VALUE), String.valueOf(Byte.MIN_VALUE), "G"},//D, y
67 { "E", "x a b c d", "5", "2f", "h", "B-8", "peach", String.valueOf(Long.MIN_VALUE), String.valueOf(Double.MAX_VALUE), String.valueOf(Short.MAX_VALUE), String.valueOf(Byte.MAX_VALUE), "F"},//E,x
68 { "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E"},//F,y
69 { "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D"},//G,x
70 { "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C"},//H,y
71 { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B"},//I,x
72 { "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A"},//J,y
73 { "W", "g", "1", null, null, null, null, null, null, null, null, null},
74 { "X", "g", "1", "0.1", null, null, null, null, null, null, null, null},
75 { "Y", "g", "1", "0.2", null, null, null, null, null, null, null, null},
76 { "Z", "f g", null, null, null, null, null, null, null, null, null, null}
79 // create an index of all the documents, or just the x, or just the y documents
81 public static void beforeClass() throws Exception {
82 indexStore = newDirectory();
83 IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(
84 TEST_VERSION_CURRENT, new SimpleAnalyzer(TEST_VERSION_CURRENT))
85 .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
86 setMergeFactor(writer.getConfig().getMergePolicy(), 1000);
87 for (int i=0; i<data.length; ++i) {
88 Document doc = new Document();
89 doc.add (new Field ("tracer", data[i][0], Field.Store.YES, Field.Index.NO));
90 doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED));
91 if (data[i][2] != null) doc.add (new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED));
92 if (data[i][3] != null) doc.add (new Field ("float", data[i][3], Field.Store.NO, Field.Index.NOT_ANALYZED));
93 if (data[i][4] != null) doc.add (new Field ("string", data[i][4], Field.Store.NO, Field.Index.NOT_ANALYZED));
94 if (data[i][5] != null) doc.add (new Field ("custom", data[i][5], Field.Store.NO, Field.Index.NOT_ANALYZED));
95 if (data[i][6] != null) doc.add (new Field ("i18n", data[i][6], Field.Store.NO, Field.Index.NOT_ANALYZED));
96 if (data[i][7] != null) doc.add (new Field ("long", data[i][7], Field.Store.NO, Field.Index.NOT_ANALYZED));
97 if (data[i][8] != null) doc.add (new Field ("double", data[i][8], Field.Store.NO, Field.Index.NOT_ANALYZED));
98 if (data[i][9] != null) doc.add (new Field ("short", data[i][9], Field.Store.NO, Field.Index.NOT_ANALYZED));
99 if (data[i][10] != null) doc.add (new Field ("byte", data[i][10], Field.Store.NO, Field.Index.NOT_ANALYZED));
100 if (data[i][11] != null) doc.add (new Field ("parser", data[i][11], Field.Store.NO, Field.Index.NOT_ANALYZED));
101 doc.setBoost(2); // produce some scores above 1.0
102 writer.addDocument (doc);
104 //writer.optimize ();
106 full = new IndexSearcher (indexStore, false);
107 full.setDefaultFieldSortScoring(true, true);
112 public static void afterClass() throws Exception {
119 public String getRandomNumberString(int num, int low, int high) {
120 StringBuilder sb = new StringBuilder();
121 for (int i = 0; i < num; i++) {
122 sb.append(getRandomNumber(low, high));
124 return sb.toString();
127 public String getRandomCharString(int num) {
128 return getRandomCharString(num, 48, 122);
131 public String getRandomCharString(int num, int start, int end) {
132 StringBuilder sb = new StringBuilder();
133 for (int i = 0; i < num; i++) {
134 sb.append(new Character((char) getRandomNumber(start, end)));
136 return sb.toString();
141 public int getRandomNumber(final int low, final int high) {
143 int randInt = (Math.abs(r.nextInt()) % (high - low)) + low;
150 public void setUp() throws Exception {
152 queryX = new TermQuery (new Term ("contents", "x"));
153 queryY = new TermQuery (new Term ("contents", "y"));
154 queryA = new TermQuery (new Term ("contents", "a"));
155 queryF = new TermQuery (new Term ("contents", "f"));
160 static class MyFieldComparator extends FieldComparator {
165 MyFieldComparator(int numHits) {
166 slotValues = new int[numHits];
170 public void copy(int slot, int doc) {
171 slotValues[slot] = docValues[doc];
175 public int compare(int slot1, int slot2) {
176 return slotValues[slot1] - slotValues[slot2];
180 public int compareBottom(int doc) {
181 return bottomValue - docValues[doc];
185 public void setBottom(int bottom) {
186 bottomValue = slotValues[bottom];
190 public void setNextReader(IndexReader reader, int docBase) throws IOException {
191 docValues = FieldCache.DEFAULT.getInts(reader, "parser", new FieldCache.IntParser() {
192 public final int parseInt(final String val) {
193 return (val.charAt(0)-'A') * 123456;
199 public Comparable<?> value(int slot) {
200 return Integer.valueOf(slotValues[slot]);
204 static class MyFieldComparatorSource extends FieldComparatorSource {
206 public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
207 return new MyFieldComparator(numHits);
211 // test a variety of sorts using a remote searcher
213 public void testRemoteSort() throws Exception {
214 Searchable searcher = lookupRemote();
215 MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
216 runMultiSorts(multi, true); // this runs on the full index
219 // test that the relevancy scores are the same even if
222 public void testNormalizedScores() throws Exception {
224 // capture relevancy scores
225 HashMap<String,Float> scoresX = getScores (full.search (queryX, null, 1000).scoreDocs, full);
226 HashMap<String,Float> scoresY = getScores (full.search (queryY, null, 1000).scoreDocs, full);
227 HashMap<String,Float> scoresA = getScores (full.search (queryA, null, 1000).scoreDocs, full);
229 // we'll test searching locally, remote and multi
230 MultiSearcher remote = new MultiSearcher (new Searchable[] { lookupRemote() });
232 // change sorting and make sure relevancy stays the same
235 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
236 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
237 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
239 sort.setSort(SortField.FIELD_DOC);
240 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
241 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
242 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
244 sort.setSort (new SortField("int", SortField.INT));
245 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
246 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
247 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
249 sort.setSort (new SortField("float", SortField.FLOAT));
250 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
251 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
252 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
254 sort.setSort (new SortField("string", SortField.STRING));
255 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
256 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
257 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
259 sort.setSort (new SortField("int", SortField.INT), new SortField("float", SortField.FLOAT));
260 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
261 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
262 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
264 sort.setSort (new SortField ("int", SortField.INT, true), new SortField (null, SortField.DOC, true) );
265 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
266 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
267 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
269 sort.setSort (new SortField("float", SortField.FLOAT), new SortField("string", SortField.STRING));
270 assertSameValues (scoresX, getScores (remote.search (queryX, null, 1000, sort).scoreDocs, remote));
271 assertSameValues (scoresY, getScores (remote.search (queryY, null, 1000, sort).scoreDocs, remote));
272 assertSameValues (scoresA, getScores (remote.search (queryA, null, 1000, sort).scoreDocs, remote));
275 // runs a variety of sorts useful for multisearchers
276 private void runMultiSorts(Searcher multi, boolean isFull) throws Exception {
277 sort.setSort(SortField.FIELD_DOC);
278 String expected = isFull ? "ABCDEFGHIJ" : "ACEGIBDFHJ";
279 assertMatches(multi, queryA, sort, expected);
281 sort.setSort(new SortField ("int", SortField.INT));
282 expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC";
283 assertMatches(multi, queryA, sort, expected);
285 sort.setSort(new SortField ("int", SortField.INT), SortField.FIELD_DOC);
286 expected = isFull ? "IDHFGJABEC" : "IDHFGJAEBC";
287 assertMatches(multi, queryA, sort, expected);
289 sort.setSort(new SortField ("float", SortField.FLOAT), SortField.FIELD_DOC);
290 assertMatches(multi, queryA, sort, "GDHJCIEFAB");
292 sort.setSort(new SortField("float", SortField.FLOAT));
293 assertMatches(multi, queryA, sort, "GDHJCIEFAB");
295 sort.setSort(new SortField("string", SortField.STRING));
296 assertMatches(multi, queryA, sort, "DJAIHGFEBC");
298 sort.setSort(new SortField ("int", SortField.INT, true));
299 expected = isFull ? "CABEJGFHDI" : "CAEBJGFHDI";
300 assertMatches(multi, queryA, sort, expected);
302 sort.setSort(new SortField ("float", SortField.FLOAT, true));
303 assertMatches(multi, queryA, sort, "BAFECIJHDG");
305 sort.setSort(new SortField ("string", SortField.STRING, true));
306 assertMatches(multi, queryA, sort, "CBEFGHIAJD");
308 sort.setSort(new SortField ("int", SortField.INT), new SortField ("float", SortField.FLOAT));
309 assertMatches(multi, queryA, sort, "IDHFGJEABC");
311 sort.setSort(new SortField ("float", SortField.FLOAT), new SortField ("string", SortField.STRING));
312 assertMatches(multi, queryA, sort, "GDHJICEFAB");
314 sort.setSort(new SortField ("int", SortField.INT));
315 assertMatches(multi, queryF, sort, "IZJ");
317 sort.setSort(new SortField ("int", SortField.INT, true));
318 assertMatches(multi, queryF, sort, "JZI");
320 sort.setSort(new SortField ("float", SortField.FLOAT));
321 assertMatches(multi, queryF, sort, "ZJI");
323 sort.setSort(new SortField ("string", SortField.STRING));
324 assertMatches(multi, queryF, sort, "ZJI");
326 sort.setSort(new SortField ("string", SortField.STRING, true));
327 assertMatches(multi, queryF, sort, "IJZ");
329 // up to this point, all of the searches should have "sane"
330 // FieldCache behavior, and should have reused hte cache in several cases
331 assertSaneFieldCaches(getName() + " Basics");
332 // next we'll check an alternate Locale for string, so purge first
333 FieldCache.DEFAULT.purgeAllCaches();
335 sort.setSort(new SortField ("string", Locale.US) );
336 assertMatches(multi, queryA, sort, "DJAIHGFEBC");
338 sort.setSort(new SortField ("string", Locale.US, true));
339 assertMatches(multi, queryA, sort, "CBEFGHIAJD");
341 assertSaneFieldCaches(getName() + " Locale.US");
342 FieldCache.DEFAULT.purgeAllCaches();
345 // make sure the documents returned by the search match the expected list
346 private void assertMatches(Searcher searcher, Query query, Sort sort,
347 String expectedResult) throws IOException {
348 //ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
349 TopDocs hits = searcher.search (query, null, expectedResult.length(), sort);
350 ScoreDoc[] result = hits.scoreDocs;
351 assertEquals(hits.totalHits, expectedResult.length());
352 StringBuilder buff = new StringBuilder(10);
353 int n = result.length;
354 for (int i=0; i<n; ++i) {
355 Document doc = searcher.doc(result[i].doc);
356 String[] v = doc.getValues("tracer");
357 for (int j=0; j<v.length; ++j) {
361 assertEquals (expectedResult, buff.toString());
364 private HashMap<String, Float> getScores (ScoreDoc[] hits, Searcher searcher)
366 HashMap<String, Float> scoreMap = new HashMap<String, Float>();
368 for (int i=0; i<n; ++i) {
369 Document doc = searcher.doc(hits[i].doc);
370 String[] v = doc.getValues("tracer");
371 assertEquals (v.length, 1);
372 scoreMap.put (v[0], Float.valueOf(hits[i].score));
377 // make sure all the values in the maps match
378 private void assertSameValues (HashMap<?, ?> m1, HashMap<?, ?> m2) {
382 Iterator<?> iter = m1.keySet().iterator();
383 while (iter.hasNext()) {
384 Object key = iter.next();
385 Object o1 = m1.get(key);
386 Object o2 = m2.get(key);
387 if (o1 instanceof Float) {
388 assertEquals(((Float)o1).floatValue(), ((Float)o2).floatValue(), 1e-6);
390 assertEquals (m1.get(key), m2.get(key));