1 package org.apache.lucene.facet.taxonomy;
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.io.StringWriter;
6 import java.util.ArrayList;
7 import java.util.Arrays;
9 import org.apache.lucene.store.Directory;
10 import org.apache.lucene.store.LockObtainFailedException;
11 import org.apache.lucene.store.RAMDirectory;
12 import org.junit.Ignore;
13 import org.junit.Test;
15 import org.apache.lucene.util.LuceneTestCase;
16 import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
17 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
18 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
19 import org.apache.lucene.util.SlowRAMDirectory;
22 * Licensed to the Apache Software Foundation (ASF) under one or more
23 * contributor license agreements. See the NOTICE file distributed with
24 * this work for additional information regarding copyright ownership.
25 * The ASF licenses this file to You under the Apache License, Version 2.0
26 * (the "License"); you may not use this file except in compliance with
27 * the License. You may obtain a copy of the License at
29 * http://www.apache.org/licenses/LICENSE-2.0
31 * Unless required by applicable law or agreed to in writing, software
32 * distributed under the License is distributed on an "AS IS" BASIS,
33 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34 * See the License for the specific language governing permissions and
35 * limitations under the License.
38 public class TestTaxonomyCombined extends LuceneTestCase {
40 /** The following categories will be added to the taxonomy by
41 fillTaxonomy(), and tested by all tests below:
43 private final static String[][] categories = {
44 { "Author", "Tom Clancy" },
45 { "Author", "Richard Dawkins" },
46 { "Author", "Richard Adams" },
47 { "Price", "10", "11" },
48 { "Price", "10", "12" },
49 { "Price", "20", "27" },
50 { "Date", "2006", "05" },
53 { "Subject", "Nonfiction", "Children", "Animals" },
54 { "Author", "Stephen Jay Gould" },
55 { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
58 /** When adding the above categories with TaxonomyWriter.addCategory(),
59 the following paths are expected to be returned:
60 (note that currently the full path is not returned, and therefore
61 not tested - rather, just the last component, the ordinal, is returned
64 private final static int[][] expectedPaths = {
79 /** The taxonomy index is expected to then contain the following
80 generated categories, with increasing ordinals (note how parent
81 categories are be added automatically when subcategories are added).
83 private final static String[][] expectedCategories = {
84 { }, // the root category
86 { "Author", "Tom Clancy" },
87 { "Author", "Richard Dawkins" },
88 { "Author", "Richard Adams" },
91 { "Price", "10", "11" },
92 { "Price", "10", "12" },
94 { "Price", "20", "27" },
97 { "Date", "2006", "05" },
100 { "Subject", "Nonfiction" },
101 { "Subject", "Nonfiction", "Children" },
102 { "Subject", "Nonfiction", "Children", "Animals" },
103 { "Author", "Stephen Jay Gould" },
104 { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
107 /** fillTaxonomy adds the categories in the categories[] array, and asserts
108 that the additions return exactly the ordinals (in the past - paths)
109 specified in expectedPaths[].
110 Note that this assumes that fillTaxonomy() is called on an empty taxonomy
111 index. Calling it after something else was already added to the taxonomy
112 index will surely have this method fail.
114 public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
115 for (int i = 0; i < categories.length; i++) {
116 int ordinal = tw.addCategory(new CategoryPath(categories[i]));
117 int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
118 if (ordinal!=expectedOrdinal) {
119 fail("For category "+showcat(categories[i])+" expected ordinal "+
120 expectedOrdinal+", but got "+ordinal);
125 public static String showcat(String[] path) {
129 if (path.length==0) {
132 if (path.length==1 && path[0].length()==0) {
135 StringBuilder sb = new StringBuilder(path[0]);
136 for (int i=1; i<path.length; i++) {
140 return sb.toString();
143 private String showcat(CategoryPath path) {
147 if (path.length()==0) {
150 return "<"+path.toString('/')+">";
153 /** Basic tests for TaxonomyWriter. Basically, we test that
154 IndexWriter.addCategory works, i.e. returns the expected ordinals
155 (this is tested by calling the fillTaxonomy() method above).
156 We do not test here that after writing the index can be read -
157 this will be done in more tests below.
160 public void testWriter() throws Exception {
161 Directory indexDir = newDirectory();
162 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
164 // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
165 // is what we expect it to be.
166 assertEquals(expectedCategories.length, tw.getSize());
171 /** testWriterTwice is exactly like testWriter, except that after adding
172 all the categories, we add them again, and see that we get the same
173 old ids again - not new categories.
176 public void testWriterTwice() throws Exception {
177 Directory indexDir = newDirectory();
178 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
180 // run fillTaxonomy again - this will try to add the same categories
181 // again, and check that we see the same ordinal paths again, not
184 // Let's check the number of categories again, to see that no
185 // extraneous categories were created:
186 assertEquals(expectedCategories.length, tw.getSize());
191 /** testWriterTwice2 is similar to testWriterTwice, except that the index
192 is closed and reopened before attempting to write to it the same
193 categories again. While testWriterTwice can get along with writing
194 and reading correctly just to the cache, testWriterTwice2 checks also
195 the actual disk read part of the writer:
198 public void testWriterTwice2() throws Exception {
199 Directory indexDir = newDirectory();
200 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
203 tw = new DirectoryTaxonomyWriter(indexDir);
204 // run fillTaxonomy again - this will try to add the same categories
205 // again, and check that we see the same ordinals again, not different
206 // ones, and that the number of categories hasn't grown by the new
209 assertEquals(expectedCategories.length, tw.getSize());
215 * testWriterTwice3 is yet another test which tests creating a taxonomy
216 * in two separate writing sessions. This test used to fail because of
217 * a bug involving commit(), explained below, and now should succeed.
222 public void testWriterTwice3() throws Exception {
223 Directory indexDir = newDirectory();
224 // First, create and fill the taxonomy
225 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
228 // Now, open the same taxonomy and add the same categories again.
229 // After a few categories, the LuceneTaxonomyWriter implementation
230 // will stop looking for each category on disk, and rather read them
231 // all into memory and close it's reader. The bug was that it closed
232 // the reader, but forgot that it did (because it didn't set the reader
233 // reference to null).
234 tw = new DirectoryTaxonomyWriter(indexDir);
236 // Add one new category, just to make commit() do something:
237 tw.addCategory(new CategoryPath("hi"));
238 // Do a commit(). Here was a bug - if tw had a reader open, it should
239 // be reopened after the commit. However, in our case the reader should
240 // not be open (as explained above) but because it was not set to null,
241 // we forgot that, tried to reopen it, and got an AlreadyClosedException.
243 assertEquals(expectedCategories.length+1, tw.getSize());
248 /** Another set of tests for the writer, which don't use an array and
249 * try to distill the different cases, and therefore may be more helpful
250 * for debugging a problem than testWriter() which is hard to know why
251 * or where it failed.
254 public void testWriterSimpler() throws Exception {
255 Directory indexDir = newDirectory();
256 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
257 assertEquals(1, tw.getSize()); // the root only
258 // Test that adding a new top-level category works
259 assertEquals(1, tw.addCategory(new CategoryPath("a")));
260 assertEquals(2, tw.getSize());
261 // Test that adding the same category again is noticed, and the
262 // same ordinal (and not a new one) is returned.
263 assertEquals(1, tw.addCategory(new CategoryPath("a")));
264 assertEquals(2, tw.getSize());
265 // Test that adding another top-level category returns a new ordinal,
267 assertEquals(2, tw.addCategory(new CategoryPath("b")));
268 assertEquals(3, tw.getSize());
269 // Test that adding a category inside one of the above adds just one
271 assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
272 assertEquals(4, tw.getSize());
273 // Test that adding the same second-level category doesn't do anything:
274 assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
275 assertEquals(4, tw.getSize());
276 // Test that adding a second-level category with two new components
277 // indeed adds two categories
278 assertEquals(5, tw.addCategory(new CategoryPath("d","e")));
279 assertEquals(6, tw.getSize());
280 // Verify that the parents were added above in the order we expected
281 assertEquals(4, tw.addCategory(new CategoryPath("d")));
282 // Similar, but inside a category that already exists:
283 assertEquals(7, tw.addCategory(new CategoryPath("b", "d","e")));
284 assertEquals(8, tw.getSize());
285 // And now inside two levels of categories that already exist:
286 assertEquals(8, tw.addCategory(new CategoryPath("b", "d","f")));
287 assertEquals(9, tw.getSize());
293 /** Test writing an empty index, and seeing that a reader finds in it
294 the root category, and only it. We check all the methods on that
295 root category return the expected results.
298 public void testRootOnly() throws Exception {
299 Directory indexDir = newDirectory();
300 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
301 // right after opening the index, it should already contain the
302 // root, so have size 1:
303 assertEquals(1, tw.getSize());
305 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
306 assertEquals(1, tr.getSize());
307 assertEquals(0, tr.getPath(0).length());
308 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
309 assertEquals(0, tr.getOrdinal(new CategoryPath()));
314 /** The following test is exactly the same as testRootOnly, except we
315 * do not close the writer before opening the reader. We want to see
316 * that the root is visible to the reader not only after the writer is
317 * closed, but immediately after it is created.
320 public void testRootOnly2() throws Exception {
321 Directory indexDir = newDirectory();
322 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
324 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
325 assertEquals(1, tr.getSize());
326 assertEquals(0, tr.getPath(0).length());
327 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
328 assertEquals(0, tr.getOrdinal(new CategoryPath()));
334 /** Basic tests for TaxonomyReader's category <=> ordinal transformations
335 (getSize(), getCategory() and getOrdinal()).
336 We test that after writing the index, it can be read and all the
337 categories and ordinals are there just as we expected them to be.
340 public void testReaderBasic() throws Exception {
341 Directory indexDir = newDirectory();
342 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
345 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
347 // test TaxonomyReader.getSize():
348 assertEquals(expectedCategories.length, tr.getSize());
350 // test round trips of ordinal => category => ordinal
351 for (int i=0; i<tr.getSize(); i++) {
352 assertEquals(i, tr.getOrdinal(tr.getPath(i)));
355 // test TaxonomyReader.getCategory():
356 for (int i=0; i<tr.getSize(); i++) {
357 CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
358 CategoryPath category = tr.getPath(i);
359 if (!expectedCategory.equals(category)) {
360 fail("For ordinal "+i+" expected category "+
361 showcat(expectedCategory)+", but got "+showcat(category));
364 // (also test invalid ordinals:)
365 assertNull(tr.getPath(-1));
366 assertNull(tr.getPath(tr.getSize()));
367 assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
369 // test TaxonomyReader.getOrdinal():
370 for (int i=0; i<expectedCategories.length; i++) {
371 int expectedOrdinal = i;
372 int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
373 if (expectedOrdinal != ordinal) {
374 fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
375 expectedOrdinal+", but got "+ordinal);
378 // (also test invalid categories:)
379 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("non-existant")));
380 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("Author", "Jules Verne")));
386 /** Tests for TaxonomyReader's getParent() method.
387 We check it by comparing its results to those we could have gotten by
388 looking at the category string paths (where the parentage is obvious).
389 Note that after testReaderBasic(), we already know we can trust the
390 ordinal <=> category conversions.
392 Note: At the moment, the parent methods in the reader are deprecated,
393 but this does not mean they should not be tested! Until they are
394 removed (*if* they are removed), these tests should remain to see
395 that they still work correctly.
399 public void testReaderParent() throws Exception {
400 Directory indexDir = newDirectory();
401 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
404 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
406 // check that the parent of the root ordinal is the invalid ordinal:
407 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
409 // check parent of non-root ordinals:
410 for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
411 CategoryPath me = tr.getPath(ordinal);
412 int parentOrdinal = tr.getParent(ordinal);
413 CategoryPath parent = tr.getPath(parentOrdinal);
415 fail("Parent of "+ordinal+" is "+parentOrdinal+
416 ", but this is not a valid category.");
418 // verify that the parent is indeed my parent, according to the strings
419 if (!new CategoryPath(me, me.length()-1).equals(parent)) {
420 fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
421 " but categories are "+showcat(parent)+" and "+showcat(me)+
426 // check parent of of invalid ordinals:
429 fail("getParent for -1 should throw exception");
430 } catch (ArrayIndexOutOfBoundsException e) {
434 tr.getParent(TaxonomyReader.INVALID_ORDINAL);
435 fail("getParent for INVALID_ORDINAL should throw exception");
436 } catch (ArrayIndexOutOfBoundsException e) {
440 int parent = tr.getParent(tr.getSize());
441 fail("getParent for getSize() should throw exception, but returned "+parent);
442 } catch (ArrayIndexOutOfBoundsException e) {
451 * Tests for TaxonomyWriter's getParent() method. We check it by comparing
452 * its results to those we could have gotten by looking at the category
453 * string paths using a TaxonomyReader (where the parentage is obvious).
454 * Note that after testReaderBasic(), we already know we can trust the
455 * ordinal <=> category conversions from TaxonomyReader.
457 * The difference between testWriterParent1 and testWriterParent2 is that
458 * the former closes the taxonomy writer before reopening it, while the
461 * This test code is virtually identical to that of testReaderParent().
464 public void testWriterParent1() throws Exception {
465 Directory indexDir = newDirectory();
466 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
469 tw = new DirectoryTaxonomyWriter(indexDir);
470 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
472 checkWriterParent(tr, tw);
480 public void testWriterParent2() throws Exception {
481 Directory indexDir = newDirectory();
482 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
485 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
487 checkWriterParent(tr, tw);
494 private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
495 // check that the parent of the root ordinal is the invalid ordinal:
496 assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));
498 // check parent of non-root ordinals:
499 for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
500 CategoryPath me = tr.getPath(ordinal);
501 int parentOrdinal = tw.getParent(ordinal);
502 CategoryPath parent = tr.getPath(parentOrdinal);
503 if (parent == null) {
504 fail("Parent of " + ordinal + " is " + parentOrdinal
505 + ", but this is not a valid category.");
507 // verify that the parent is indeed my parent, according to the
509 if (!new CategoryPath(me, me.length() - 1).equals(parent)) {
510 fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
511 + " but categories are " + showcat(parent) + " and "
512 + showcat(me) + " respectively.");
516 // check parent of of invalid ordinals:
519 fail("getParent for -1 should throw exception");
520 } catch (ArrayIndexOutOfBoundsException e) {
524 tw.getParent(TaxonomyReader.INVALID_ORDINAL);
525 fail("getParent for INVALID_ORDINAL should throw exception");
526 } catch (ArrayIndexOutOfBoundsException e) {
530 int parent = tw.getParent(tr.getSize());
531 fail("getParent for getSize() should throw exception, but returned "
533 } catch (ArrayIndexOutOfBoundsException e) {
538 /** Tests TaxonomyReader's getParentArray() method. We do not test this
539 method directly, but rather just compare its results to those from
540 other methods (which we have already tested above).
543 public void testReaderParentArray() throws Exception {
544 Directory indexDir = newDirectory();
545 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
548 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
549 int[] parents = tr.getParentArray();
550 assertEquals(tr.getSize(), parents.length);
551 for (int i=0; i<tr.getSize(); i++) {
552 assertEquals(tr.getParent(i), parents[i]);
559 * Test TaxonomyReader's child browsing method, getChildrenArrays()
560 * This only tests for correctness of the data on one example - we have
561 * below further tests on data refresh etc.
564 public void testChildrenArrays() throws Exception {
565 Directory indexDir = newDirectory();
566 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
569 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
570 ChildrenArrays ca = tr.getChildrenArrays();
571 int[] youngestChildArray = ca.getYoungestChildArray();
572 assertEquals(tr.getSize(), youngestChildArray.length);
573 int[] olderSiblingArray = ca.getOlderSiblingArray();
574 assertEquals(tr.getSize(), olderSiblingArray.length);
575 for (int i=0; i<expectedCategories.length; i++) {
576 // find expected children by looking at all expectedCategories
578 ArrayList<Integer> expectedChildren = new ArrayList<Integer>();
579 for (int j=expectedCategories.length-1; j>=0; j--) {
580 if (expectedCategories[j].length != expectedCategories[i].length+1) {
581 continue; // not longer by 1, so can't be a child
583 boolean ischild=true;
584 for (int k=0; k<expectedCategories[i].length; k++) {
585 if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
591 expectedChildren.add(j);
594 // check that children and expectedChildren are the same, with the
595 // correct reverse (youngest to oldest) order:
596 if (expectedChildren.size()==0) {
597 assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
599 int child = youngestChildArray[i];
600 assertEquals(expectedChildren.get(0).intValue(),
602 for (int j=1; j<expectedChildren.size(); j++) {
603 child = olderSiblingArray[child];
604 assertEquals(expectedChildren.get(j).intValue(),
606 // if child is INVALID_ORDINAL we should stop, but
607 // assertEquals would fail in this case anyway.
609 // When we're done comparing, olderSiblingArray should now point
610 // to INVALID_ORDINAL, saying there are no more children. If it
611 // doesn't, we found too many children...
612 assertEquals(-1, olderSiblingArray[child]);
620 * Similar to testChildrenArrays, except rather than look at
621 * expected results, we test for several "invariants" that the results
622 * should uphold, e.g., that a child of a category indeed has this category
623 * as its parent. This sort of test can more easily be extended to larger
624 * example taxonomies, because we do not need to build the expected list
625 * of categories like we did in the above test.
628 public void testChildrenArraysInvariants() throws Exception {
629 Directory indexDir = newDirectory();
630 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
633 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
634 ChildrenArrays ca = tr.getChildrenArrays();
635 int[] youngestChildArray = ca.getYoungestChildArray();
636 assertEquals(tr.getSize(), youngestChildArray.length);
637 int[] olderSiblingArray = ca.getOlderSiblingArray();
638 assertEquals(tr.getSize(), olderSiblingArray.length);
640 // test that the "youngest child" of every category is indeed a child:
641 for (int i=0; i<tr.getSize(); i++) {
642 int youngestChild = youngestChildArray[i];
643 if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
644 assertEquals(i, tr.getParent(youngestChild));
648 // test that the "older sibling" of every category is indeed older (lower)
649 // (it can also be INVALID_ORDINAL, which is lower than any ordinal)
650 for (int i=0; i<tr.getSize(); i++) {
651 assertTrue("olderSiblingArray["+i+"] should be <"+i, olderSiblingArray[i] < i);
654 // test that the "older sibling" of every category is indeed a sibling
655 // (they share the same parent)
656 for (int i=0; i<tr.getSize(); i++) {
657 int sibling = olderSiblingArray[i];
658 if (sibling == TaxonomyReader.INVALID_ORDINAL) {
661 assertEquals(tr.getParent(i), tr.getParent(sibling));
664 // And now for slightly more complex (and less "invariant-like"...)
667 // test that the "youngest child" is indeed the youngest (so we don't
668 // miss the first children in the chain)
669 for (int i=0; i<tr.getSize(); i++) {
670 // Find the really youngest child:
672 for (j=tr.getSize()-1; j>i; j--) {
673 if (tr.getParent(j)==i) {
674 break; // found youngest child
677 if (j==i) { // no child found
678 j=TaxonomyReader.INVALID_ORDINAL;
680 assertEquals(j, youngestChildArray[i]);
683 // test that the "older sibling" is indeed the least oldest one - and
684 // not a too old one or -1 (so we didn't miss some children in the
685 // middle or the end of the chain).
686 for (int i=0; i<tr.getSize(); i++) {
687 // Find the youngest older sibling:
689 for (j=i-1; j>=0; j--) {
690 if (tr.getParent(j)==tr.getParent(i)) {
691 break; // found youngest older sibling
694 if (j<0) { // no sibling found
695 j=TaxonomyReader.INVALID_ORDINAL;
697 assertEquals(j, olderSiblingArray[i]);
705 * Test how getChildrenArrays() deals with the taxonomy's growth:
708 public void testChildrenArraysGrowth() throws Exception {
709 Directory indexDir = newDirectory();
710 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
711 tw.addCategory(new CategoryPath("hi", "there"));
713 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
714 ChildrenArrays ca = tr.getChildrenArrays();
715 assertEquals(3, tr.getSize());
716 assertEquals(3, ca.getOlderSiblingArray().length);
717 assertEquals(3, ca.getYoungestChildArray().length);
718 assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.getYoungestChildArray()));
719 assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.getOlderSiblingArray()));
720 tw.addCategory(new CategoryPath("hi", "ho"));
721 tw.addCategory(new CategoryPath("hello"));
723 // Before refresh, nothing changed..
724 ChildrenArrays newca = tr.getChildrenArrays();
725 assertSame(newca, ca); // we got exactly the same object
726 assertEquals(3, tr.getSize());
727 assertEquals(3, ca.getOlderSiblingArray().length);
728 assertEquals(3, ca.getYoungestChildArray().length);
729 // After the refresh, things change:
731 ca = tr.getChildrenArrays();
732 assertEquals(5, tr.getSize());
733 assertEquals(5, ca.getOlderSiblingArray().length);
734 assertEquals(5, ca.getYoungestChildArray().length);
735 assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.getYoungestChildArray()));
736 assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.getOlderSiblingArray()));
743 * Test that getParentArrays is valid when retrieved during refresh
747 public void testTaxonomyReaderRefreshRaces() throws Exception {
748 // compute base child arrays - after first chunk, and after the other
749 Directory indexDirBase = newDirectory();
750 TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
751 twBase.addCategory(new CategoryPath("a", "0"));
752 final CategoryPath abPath = new CategoryPath("a", "b");
753 twBase.addCategory(abPath);
755 TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
757 final ChildrenArrays ca1 = trBase.getChildrenArrays();
759 final int abOrd = trBase.getOrdinal(abPath);
760 final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
762 for (int i=0; i < 1<<10; i++) { //1024 facets
763 twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
769 final ChildrenArrays ca2 = trBase.getChildrenArrays();
770 final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
772 for (int retry=0; retry<100; retry++) {
773 assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry);
775 indexDirBase.close();
778 private void assertConsistentYoungestChild(final CategoryPath abPath,
779 final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
781 SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization
782 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
783 tw.addCategory(new CategoryPath("a", "0"));
784 tw.addCategory(abPath);
787 final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
788 for (int i=0; i < 1<<10; i++) { //1024 facets
789 final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
791 assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
795 final boolean[] stop = new boolean[] { false };
796 final Throwable[] error = new Throwable[] { null };
797 final int retrieval[] = { 0 };
799 Thread thread = new Thread("Child Arrays Verifier") {
802 setPriority(1+getPriority());
805 int lastOrd = tr.getParentArray().length-1;
806 assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd));
807 assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++);
809 } catch (Throwable e) {
815 private void assertChildrenArrays(ChildrenArrays ca, int retry, int retrieval) {
816 final int abYoungChild = ca.getYoungestChildArray()[abOrd];
818 "Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
819 ") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
820 abYoungChildBase1==abYoungChild ||
821 abYoungChildBase2==ca.getYoungestChildArray()[abOrd]);
826 indexDir.setSleepMillis(1); // some delay for refresh
831 assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
837 /** Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
838 * outside the thread to show the stack trace in case of failure. */
839 private String stackTraceStr(final Throwable error) {
843 StringWriter sw = new StringWriter();
844 PrintWriter pw = new PrintWriter(sw);
845 error.printStackTrace(pw);
847 return sw.toString();
850 /** Test that if separate reader and writer objects are opened, new
851 categories written into the writer are available to a reader only
853 Note that this test obviously doesn't cover all the different
854 concurrency scenarios, all different methods, and so on. We may
855 want to write more tests of this sort.
857 This test simulates what would happen when there are two separate
858 processes, one doing indexing, and the other searching, and each opens
859 its own object (with obviously no connection between the objects) using
860 the same disk files. Note, though, that this test does not test what
861 happens when the two processes do their actual work at exactly the same
863 It also doesn't test multi-threading.
866 public void testSeparateReaderAndWriter() throws Exception {
867 Directory indexDir = newDirectory();
868 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
870 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
874 // getParent() and getSize() test:
876 tr.getParent(author);
877 fail("Initially, getParent for "+author+" should throw exception");
878 } catch (ArrayIndexOutOfBoundsException e) {
881 assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
882 tw.addCategory(new CategoryPath("Author"));
884 tr.getParent(author);
885 fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
886 } catch (ArrayIndexOutOfBoundsException e) {
889 assertEquals(1, tr.getSize()); // still root only...
890 tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
892 tr.getParent(author);
893 fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
894 } catch (ArrayIndexOutOfBoundsException e) {
897 assertEquals(1, tr.getSize()); // still root only...
900 tr.getParent(author);
901 fail("Before refresh(), getParent for "+author+" should still throw exception");
902 } catch (ArrayIndexOutOfBoundsException e) {
905 assertEquals(1, tr.getSize()); // still root only...
908 assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
910 } catch (ArrayIndexOutOfBoundsException e) {
911 fail("After category addition, commit() and refresh(), getParent for "+author+" should NOT throw exception");
913 assertEquals(2, tr.getSize()); // finally, see there are two categories
915 // now, add another category, and verify that after commit and refresh
916 // the parent of this category is correct (this requires the reader
917 // to correctly update its prefetched parent vector), and that the
918 // old information also wasn't ruined:
919 tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
923 assertEquals(author, tr.getParent(dawkins));
924 assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
925 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
926 assertEquals(3, tr.getSize());
933 public void testSeparateReaderAndWriter2() throws Exception {
934 Directory indexDir = newDirectory();
935 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
937 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
939 // Test getOrdinal():
940 CategoryPath author = new CategoryPath("Author");
942 assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
943 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
944 tw.addCategory(author);
945 // before commit and refresh, no change:
946 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
947 assertEquals(1, tr.getSize()); // still root only...
948 tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
949 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
950 assertEquals(1, tr.getSize()); // still root only...
952 // still not enough before refresh:
953 assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
954 assertEquals(1, tr.getSize()); // still root only...
955 tr.refresh(); // finally
956 assertEquals(1, tr.getOrdinal(author));
957 assertEquals(2, tr.getSize()); // still root only...
964 * Test what happens if we try to write to a locked taxonomy writer,
965 * and see that we can unlock it and continue.
968 public void testWriterLock() throws Exception {
969 // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
970 Directory indexDir = new RAMDirectory();
971 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
972 tw.addCategory(new CategoryPath("hi", "there"));
974 // we deliberately not close the write now, and keep it open and
976 // Verify that the writer worked:
977 TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
978 assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
979 // Try to open a second writer, with the first one locking the directory.
980 // We expect to get a LockObtainFailedException.
982 new DirectoryTaxonomyWriter(indexDir);
983 fail("should have failed to write in locked directory");
984 } catch (LockObtainFailedException e) {
985 // this is what we expect to happen.
987 // Remove the lock, and now the open should succeed, and we can
988 // write to the new writer.
989 DirectoryTaxonomyWriter.unlock(indexDir);
990 TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
991 tw2.addCategory(new CategoryPath("hey"));
993 // See that the writer indeed wrote:
995 assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
1002 * fillTaxonomyCheckPaths adds the categories in the categories[] array,
1003 * and asserts that the additions return exactly paths specified in
1004 * expectedPaths[]. This is the same add fillTaxonomy() but also checks
1005 * the correctness of getParent(), not just addCategory().
1006 * Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
1007 * taxonomy index. Calling it after something else was already added to the
1008 * taxonomy index will surely have this method fail.
1010 public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
1011 for (int i = 0; i < categories.length; i++) {
1012 int ordinal = tw.addCategory(new CategoryPath(categories[i]));
1013 int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
1014 if (ordinal!=expectedOrdinal) {
1015 fail("For category "+showcat(categories[i])+" expected ordinal "+
1016 expectedOrdinal+", but got "+ordinal);
1018 for (int j=expectedPaths[i].length-2; j>=0; j--) {
1019 ordinal = tw.getParent(ordinal);
1020 expectedOrdinal = expectedPaths[i][j];
1021 if (ordinal!=expectedOrdinal) {
1022 fail("For category "+showcat(categories[i])+" expected ancestor level "+
1023 (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
1024 ", but got "+ordinal);
1030 // After fillTaxonomy returned successfully, checkPaths() checks that
1031 // the getParent() calls return as expected, from the table
1032 public static void checkPaths(TaxonomyWriter tw) throws IOException {
1033 for (int i = 0; i < categories.length; i++) {
1034 int ordinal = expectedPaths[i][expectedPaths[i].length-1];
1035 for (int j=expectedPaths[i].length-2; j>=0; j--) {
1036 ordinal = tw.getParent(ordinal);
1037 int expectedOrdinal = expectedPaths[i][j];
1038 if (ordinal!=expectedOrdinal) {
1039 fail("For category "+showcat(categories[i])+" expected ancestor level "+
1040 (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
1041 ", but got "+ordinal);
1044 assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
1046 assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
1050 * Basic test for TaxonomyWriter.getParent(). This is similar to testWriter
1051 * above, except we also check the parents of the added categories, not just
1052 * the categories themselves.
1055 public void testWriterCheckPaths() throws Exception {
1056 Directory indexDir = newDirectory();
1057 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
1058 fillTaxonomyCheckPaths(tw);
1059 // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
1060 // is what we expect it to be.
1061 assertEquals(expectedCategories.length, tw.getSize());
1067 * testWriterCheckPaths2 is the path-checking variant of testWriterTwice
1068 * and testWriterTwice2. After adding all the categories, we add them again,
1069 * and see that we get the same old ids and paths. We repeat the path checking
1070 * yet again after closing and opening the index for writing again - to see
1071 * that the reading of existing data from disk works as well.
1074 public void testWriterCheckPaths2() throws Exception {
1075 Directory indexDir = newDirectory();
1076 TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
1083 tw = new DirectoryTaxonomyWriter(indexDir);
1091 // TODO (Facet): test multiple readers, one writer. Have the multiple readers
1092 // using the same object (simulating threads) or different objects
1093 // (simulating processes).