X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java diff --git a/lucene-java-3.4.0/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java b/lucene-java-3.4.0/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java deleted file mode 100644 index 326c50b..0000000 --- a/lucene-java-3.4.0/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java +++ /dev/null @@ -1,490 +0,0 @@ -package org.apache.lucene.search.grouping; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.*; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -import java.io.IOException; -import java.util.*; - -public class TermAllGroupHeadsCollectorTest extends LuceneTestCase { - - public void testBasic() throws Exception { - final String groupField = "author"; - Directory dir = newDirectory(); - RandomIndexWriter w = new RandomIndexWriter( - random, - dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); - - // 0 - Document doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 1 - doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 2 - doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - w.commit(); // To ensure a second segment - - // 3 - doc = new Document(); - doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 4 - doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 5 - doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 6 -- no author field - doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - // 7 -- no author field - doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "7", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - w.addDocument(doc); - - IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); - w.close(); - int maxDoc = indexSearcher.maxDoc(); - - Sort sortWithinGroup = new Sort(new SortField("id", SortField.INT, true)); - AbstractAllGroupHeadsCollector c1 = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup); - indexSearcher.search(new TermQuery(new Term("content", "random")), c1); - assertTrue(arrayContains(new int[]{2, 3, 5, 7}, c1.retrieveGroupHeads())); - assertTrue(openBitSetContains(new int[]{2, 3, 5, 7}, c1.retrieveGroupHeads(maxDoc), maxDoc)); - - AbstractAllGroupHeadsCollector c2 = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup); - indexSearcher.search(new TermQuery(new Term("content", "some")), c2); - assertTrue(arrayContains(new int[]{2, 3, 4}, c2.retrieveGroupHeads())); - assertTrue(openBitSetContains(new int[]{2, 3, 4}, c2.retrieveGroupHeads(maxDoc), maxDoc)); - - AbstractAllGroupHeadsCollector c3 = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup); - indexSearcher.search(new TermQuery(new Term("content", "blob")), c3); - assertTrue(arrayContains(new int[]{1, 5}, c3.retrieveGroupHeads())); - assertTrue(openBitSetContains(new int[]{1, 5}, c3.retrieveGroupHeads(maxDoc), maxDoc)); - - // STRING sort type triggers different implementation - Sort sortWithinGroup2 = new Sort(new SortField("id", SortField.STRING, true)); - AbstractAllGroupHeadsCollector c4 = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup2); - indexSearcher.search(new TermQuery(new Term("content", "random")), c4); - assertTrue(arrayContains(new int[]{2, 3, 5, 7}, c4.retrieveGroupHeads())); - assertTrue(openBitSetContains(new int[]{2, 3, 5, 7}, c4.retrieveGroupHeads(maxDoc), maxDoc)); - - Sort sortWithinGroup3 = new Sort(new SortField("id", SortField.STRING, false)); - AbstractAllGroupHeadsCollector c5 = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup3); - indexSearcher.search(new TermQuery(new Term("content", "random")), c5); - // 7 b/c higher doc id wins, even if order of field is in not in reverse. - assertTrue(arrayContains(new int[]{0, 3, 4, 6}, c5.retrieveGroupHeads())); - assertTrue(openBitSetContains(new int[]{0, 3, 4, 6}, c5.retrieveGroupHeads(maxDoc), maxDoc)); - - indexSearcher.getIndexReader().close(); - dir.close(); - } - - public void testRandom() throws Exception { - int numberOfRuns = _TestUtil.nextInt(random, 3, 6); - for (int iter = 0; iter < numberOfRuns; iter++) { - if (VERBOSE) { - System.out.println(String.format("TEST: iter=%d total=%d", iter, numberOfRuns)); - } - - final int numDocs = _TestUtil.nextInt(random, 100, 1000) * RANDOM_MULTIPLIER; - final int numGroups = _TestUtil.nextInt(random, 1, numDocs); - - if (VERBOSE) { - System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); - } - - final List groups = new ArrayList(); - for (int i = 0; i < numGroups; i++) { - groups.add(_TestUtil.randomRealisticUnicodeString(random)); - } - final String[] contentStrings = new String[_TestUtil.nextInt(random, 2, 20)]; - if (VERBOSE) { - System.out.println("TEST: create fake content"); - } - for (int contentIDX = 0; contentIDX < contentStrings.length; contentIDX++) { - final StringBuilder sb = new StringBuilder(); - sb.append("real").append(random.nextInt(3)).append(' '); - final int fakeCount = random.nextInt(10); - for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++) { - sb.append("fake "); - } - contentStrings[contentIDX] = sb.toString(); - if (VERBOSE) { - System.out.println(" content=" + sb.toString()); - } - } - - Directory dir = newDirectory(); - RandomIndexWriter w = new RandomIndexWriter( - random, - dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random))); - - Document doc = new Document(); - Document docNoGroup = new Document(); - Field group = newField("group", "", Field.Index.NOT_ANALYZED); - doc.add(group); - Field sort1 = newField("sort1", "", Field.Index.NOT_ANALYZED); - doc.add(sort1); - docNoGroup.add(sort1); - Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED); - doc.add(sort2); - docNoGroup.add(sort2); - Field sort3 = newField("sort3", "", Field.Index.NOT_ANALYZED); - doc.add(sort3); - docNoGroup.add(sort3); - Field content = newField("content", "", Field.Index.ANALYZED); - doc.add(content); - docNoGroup.add(content); - NumericField id = new NumericField("id"); - doc.add(id); - docNoGroup.add(id); - final GroupDoc[] groupDocs = new GroupDoc[numDocs]; - for (int i = 0; i < numDocs; i++) { - final String groupValue; - if (random.nextInt(24) == 17) { - // So we test the "doc doesn't have the group'd - // field" case: - groupValue = null; - } else { - groupValue = groups.get(random.nextInt(groups.size())); - } - - final GroupDoc groupDoc = new GroupDoc( - i, - groupValue, - groups.get(random.nextInt(groups.size())), - groups.get(random.nextInt(groups.size())), - String.format("%05d", i), - contentStrings[random.nextInt(contentStrings.length)] - ); - - if (VERBOSE) { - System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group) + " sort1=" + groupDoc.sort1 + " sort2=" + groupDoc.sort2 + " sort3=" + groupDoc.sort3); - } - - groupDocs[i] = groupDoc; - if (groupDoc.group != null) { - group.setValue(groupDoc.group); - } - sort1.setValue(groupDoc.sort1); - sort2.setValue(groupDoc.sort2); - sort3.setValue(groupDoc.sort3); - content.setValue(groupDoc.content); - id.setIntValue(groupDoc.id); - if (groupDoc.group == null) { - w.addDocument(docNoGroup); - } else { - w.addDocument(doc); - } - } - - final IndexReader r = w.getReader(); - w.close(); - - // NOTE: intentional but temporary field cache insanity! - final int[] docIdToFieldId = FieldCache.DEFAULT.getInts(r, "id"); - final int[] fieldIdToDocID = new int[numDocs]; - for (int i = 0; i < docIdToFieldId.length; i++) { - int fieldId = docIdToFieldId[i]; - fieldIdToDocID[fieldId] = i; - } - - try { - final IndexSearcher s = newSearcher(r); - - for (int contentID = 0; contentID < 3; contentID++) { - final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs; - for (ScoreDoc hit : hits) { - final GroupDoc gd = groupDocs[docIdToFieldId[hit.doc]]; - assertTrue(gd.score == 0.0); - gd.score = hit.score; - int docId = gd.id; - assertEquals(docId, docIdToFieldId[hit.doc]); - } - } - - for (GroupDoc gd : groupDocs) { - assertTrue(gd.score != 0.0); - } - - for (int searchIter = 0; searchIter < 100; searchIter++) { - - if (VERBOSE) { - System.out.println("TEST: searchIter=" + searchIter); - } - - final String searchTerm = "real" + random.nextInt(3); - boolean sortByScoreOnly = random.nextBoolean(); - Sort sortWithinGroup = getRandomSort(sortByScoreOnly); - AbstractAllGroupHeadsCollector allGroupHeadsCollector = TermAllGroupHeadsCollector.create("group", sortWithinGroup); - s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector); - int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID); - int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads(); - // The actual group heads contains Lucene ids. Need to change them into our id value. - for (int i = 0; i < actualGroupHeads.length; i++) { - actualGroupHeads[i] = docIdToFieldId[actualGroupHeads[i]]; - } - // Allows us the easily iterate and assert the actual and expected results. - Arrays.sort(expectedGroupHeads); - Arrays.sort(actualGroupHeads); - - if (VERBOSE) { - System.out.println("Collector: " + allGroupHeadsCollector.getClass().getSimpleName()); - System.out.println("Sort within group: " + sortWithinGroup); - System.out.println("Num group: " + numGroups); - System.out.println("Num doc: " + numDocs); - System.out.println("\n=== Expected: \n"); - for (int expectedDocId : expectedGroupHeads) { - GroupDoc expectedGroupDoc = groupDocs[expectedDocId]; - String expectedGroup = expectedGroupDoc.group == null ? null : expectedGroupDoc.group; - System.out.println( - String.format( - "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", - expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1, - expectedGroupDoc.sort2, expectedGroupDoc.sort3, expectedDocId - ) - ); - } - System.out.println("\n=== Actual: \n"); - for (int actualDocId : actualGroupHeads) { - GroupDoc actualGroupDoc = groupDocs[actualDocId]; - String actualGroup = actualGroupDoc.group == null ? null : actualGroupDoc.group; - System.out.println( - String.format( - "Group:%10s score%5f Sort1:%10s Sort2:%10s Sort3:%10s doc:%5d", - actualGroup, actualGroupDoc.score, actualGroupDoc.sort1, - actualGroupDoc.sort2, actualGroupDoc.sort3, actualDocId - ) - ); - } - System.out.println("\n==================================================================================="); - } - - assertEquals(expectedGroupHeads.length, actualGroupHeads.length); - for (int i = 0; i < expectedGroupHeads.length; i++) { - assertEquals(expectedGroupHeads[i], actualGroupHeads[i]); - } - } - s.close(); - } finally { - FieldCache.DEFAULT.purge(r); - } - - r.close(); - dir.close(); - } - } - - - private boolean arrayContains(int[] expected, int[] actual) { - if (expected.length != actual.length) { - return false; - } - - for (int e : expected) { - boolean found = false; - for (int a : actual) { - if (e == a) { - found = true; - } - } - - if (!found) { - return false; - } - } - - return true; - } - - private boolean openBitSetContains(int[] expectedDocs, FixedBitSet actual, int maxDoc) throws IOException { - if (expectedDocs.length != actual.cardinality()) { - return false; - } - - FixedBitSet expected = new FixedBitSet(maxDoc); - for (int expectedDoc : expectedDocs) { - expected.set(expectedDoc); - } - - int docId; - DocIdSetIterator iterator = expected.iterator(); - while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - if (!actual.get(docId)) { - return false; - } - } - - return true; - } - - private int[] createExpectedGroupHeads(String searchTerm, GroupDoc[] groupDocs, Sort docSort, boolean sortByScoreOnly, int[] fieldIdToDocID) throws IOException { - Map> groupHeads = new HashMap>(); - for (GroupDoc groupDoc : groupDocs) { - if (!groupDoc.content.startsWith(searchTerm)) { - continue; - } - - if (!groupHeads.containsKey(groupDoc.group)) { - List list = new ArrayList(); - list.add(groupDoc); - groupHeads.put(groupDoc.group, list); - continue; - } - groupHeads.get(groupDoc.group).add(groupDoc); - } - - int[] allGroupHeads = new int[groupHeads.size()]; - int i = 0; - for (String groupValue : groupHeads.keySet()) { - List docs = groupHeads.get(groupValue); - Collections.sort(docs, getComparator(docSort, sortByScoreOnly, fieldIdToDocID)); - allGroupHeads[i++] = docs.get(0).id; - } - - return allGroupHeads; - } - - private Sort getRandomSort(boolean scoreOnly) { - final List sortFields = new ArrayList(); - if (random.nextInt(7) == 2 || scoreOnly) { - sortFields.add(SortField.FIELD_SCORE); - } else { - if (random.nextBoolean()) { - if (random.nextBoolean()) { - sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); - } else { - sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); - } - } else if (random.nextBoolean()) { - sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean())); - sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean())); - } - } - // Break ties: - if (random.nextBoolean() && !scoreOnly) { - sortFields.add(new SortField("sort3", SortField.STRING)); - } else if (!scoreOnly) { - sortFields.add(new SortField("id", SortField.INT)); - } - return new Sort(sortFields.toArray(new SortField[sortFields.size()])); - } - - private Comparator getComparator(Sort sort, final boolean sortByScoreOnly, final int[] fieldIdToDocID) { - final SortField[] sortFields = sort.getSort(); - return new Comparator() { - public int compare(GroupDoc d1, GroupDoc d2) { - for (SortField sf : sortFields) { - final int cmp; - if (sf.getType() == SortField.SCORE) { - if (d1.score > d2.score) { - cmp = -1; - } else if (d1.score < d2.score) { - cmp = 1; - } else { - cmp = sortByScoreOnly ? fieldIdToDocID[d1.id] - fieldIdToDocID[d2.id] : 0; - } - } else if (sf.getField().equals("sort1")) { - cmp = d1.sort1.compareTo(d2.sort1); - } else if (sf.getField().equals("sort2")) { - cmp = d1.sort2.compareTo(d2.sort2); - } else if (sf.getField().equals("sort3")) { - cmp = d1.sort3.compareTo(d2.sort3); - } else { - assertEquals(sf.getField(), "id"); - cmp = d1.id - d2.id; - } - if (cmp != 0) { - return sf.getReverse() ? -cmp : cmp; - } - } - // Our sort always fully tie breaks: - fail(); - return 0; - } - }; - } - - - private static class GroupDoc { - final int id; - final String group; - final String sort1; - final String sort2; - final String sort3; - // content must be "realN ..." - final String content; - float score; - - public GroupDoc(int id, String group, String sort1, String sort2, String sort3, String content) { - this.id = id; - this.group = group; - this.sort1 = sort1; - this.sort2 = sort2; - this.sort3 = sort3; - this.content = content; - } - - } - -}