+++ /dev/null
-package org.apache.lucene.facet.taxonomy;
-
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.RAMDirectory;
-import org.junit.Ignore;
-import org.junit.Test;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
-import org.apache.lucene.util.SlowRAMDirectory;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class TestTaxonomyCombined extends LuceneTestCase {
-
- /** The following categories will be added to the taxonomy by
- fillTaxonomy(), and tested by all tests below:
- */
- private final static String[][] categories = {
- { "Author", "Tom Clancy" },
- { "Author", "Richard Dawkins" },
- { "Author", "Richard Adams" },
- { "Price", "10", "11" },
- { "Price", "10", "12" },
- { "Price", "20", "27" },
- { "Date", "2006", "05" },
- { "Date", "2005" },
- { "Date", "2006" },
- { "Subject", "Nonfiction", "Children", "Animals" },
- { "Author", "Stephen Jay Gould" },
- { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
- };
-
- /** When adding the above categories with TaxonomyWriter.addCategory(),
- the following paths are expected to be returned:
- (note that currently the full path is not returned, and therefore
- not tested - rather, just the last component, the ordinal, is returned
- and tested.
- */
- private final static int[][] expectedPaths = {
- { 1, 2 },
- { 1, 3 },
- { 1, 4 },
- { 5, 6, 7 },
- { 5, 6, 8 },
- { 5, 9, 10 },
- { 11, 12, 13 },
- { 11, 14 },
- { 11, 12 },
- { 15, 16, 17, 18 },
- { 1, 19 },
- { 1, 20 }
- };
-
- /** The taxonomy index is expected to then contain the following
- generated categories, with increasing ordinals (note how parent
- categories are be added automatically when subcategories are added).
- */
- private final static String[][] expectedCategories = {
- { }, // the root category
- { "Author" },
- { "Author", "Tom Clancy" },
- { "Author", "Richard Dawkins" },
- { "Author", "Richard Adams" },
- { "Price" },
- { "Price", "10" },
- { "Price", "10", "11" },
- { "Price", "10", "12" },
- { "Price", "20" },
- { "Price", "20", "27" },
- { "Date" },
- { "Date", "2006" },
- { "Date", "2006", "05" },
- { "Date", "2005" },
- { "Subject" },
- { "Subject", "Nonfiction" },
- { "Subject", "Nonfiction", "Children" },
- { "Subject", "Nonfiction", "Children", "Animals" },
- { "Author", "Stephen Jay Gould" },
- { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
- };
-
- /** fillTaxonomy adds the categories in the categories[] array, and asserts
- that the additions return exactly the ordinals (in the past - paths)
- specified in expectedPaths[].
- Note that this assumes that fillTaxonomy() is called on an empty taxonomy
- index. Calling it after something else was already added to the taxonomy
- index will surely have this method fail.
- */
- public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
- for (int i = 0; i < categories.length; i++) {
- int ordinal = tw.addCategory(new CategoryPath(categories[i]));
- int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
- if (ordinal!=expectedOrdinal) {
- fail("For category "+showcat(categories[i])+" expected ordinal "+
- expectedOrdinal+", but got "+ordinal);
- }
- }
- }
-
- public static String showcat(String[] path) {
- if (path==null) {
- return "<null>";
- }
- if (path.length==0) {
- return "<empty>";
- }
- if (path.length==1 && path[0].length()==0) {
- return "<\"\">";
- }
- StringBuilder sb = new StringBuilder(path[0]);
- for (int i=1; i<path.length; i++) {
- sb.append('/');
- sb.append(path[i]);
- }
- return sb.toString();
- }
-
- private String showcat(CategoryPath path) {
- if (path==null) {
- return "<null>";
- }
- if (path.length()==0) {
- return "<empty>";
- }
- return "<"+path.toString('/')+">";
- }
-
- /** Basic tests for TaxonomyWriter. Basically, we test that
- IndexWriter.addCategory works, i.e. returns the expected ordinals
- (this is tested by calling the fillTaxonomy() method above).
- We do not test here that after writing the index can be read -
- this will be done in more tests below.
- */
- @Test
- public void testWriter() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
- // is what we expect it to be.
- assertEquals(expectedCategories.length, tw.getSize());
- tw.close();
- indexDir.close();
- }
-
- /** testWriterTwice is exactly like testWriter, except that after adding
- all the categories, we add them again, and see that we get the same
- old ids again - not new categories.
- */
- @Test
- public void testWriterTwice() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- // run fillTaxonomy again - this will try to add the same categories
- // again, and check that we see the same ordinal paths again, not
- // different ones.
- fillTaxonomy(tw);
- // Let's check the number of categories again, to see that no
- // extraneous categories were created:
- assertEquals(expectedCategories.length, tw.getSize());
- tw.close();
- indexDir.close();
- }
-
- /** testWriterTwice2 is similar to testWriterTwice, except that the index
- is closed and reopened before attempting to write to it the same
- categories again. While testWriterTwice can get along with writing
- and reading correctly just to the cache, testWriterTwice2 checks also
- the actual disk read part of the writer:
- */
- @Test
- public void testWriterTwice2() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- tw = new LuceneTaxonomyWriter(indexDir);
- // run fillTaxonomy again - this will try to add the same categories
- // again, and check that we see the same ordinals again, not different
- // ones, and that the number of categories hasn't grown by the new
- // additions
- fillTaxonomy(tw);
- assertEquals(expectedCategories.length, tw.getSize());
- tw.close();
- indexDir.close();
- }
-
- /**
- * testWriterTwice3 is yet another test which tests creating a taxonomy
- * in two separate writing sessions. This test used to fail because of
- * a bug involving commit(), explained below, and now should succeed.
- *
- * @throws Exception
- */
- @Test
- public void testWriterTwice3() throws Exception {
- Directory indexDir = newDirectory();
- // First, create and fill the taxonomy
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- // Now, open the same taxonomy and add the same categories again.
- // After a few categories, the LuceneTaxonomyWriter implementation
- // will stop looking for each category on disk, and rather read them
- // all into memory and close it's reader. The bug was that it closed
- // the reader, but forgot that it did (because it didn't set the reader
- // reference to null).
- tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- // Add one new category, just to make commit() do something:
- tw.addCategory(new CategoryPath("hi"));
- // Do a commit(). Here was a bug - if tw had a reader open, it should
- // be reopened after the commit. However, in our case the reader should
- // not be open (as explained above) but because it was not set to null,
- // we forgot that, tried to reopen it, and got an AlreadyClosedException.
- tw.commit();
- assertEquals(expectedCategories.length+1, tw.getSize());
- tw.close();
- indexDir.close();
- }
-
- /** Another set of tests for the writer, which don't use an array and
- * try to distill the different cases, and therefore may be more helpful
- * for debugging a problem than testWriter() which is hard to know why
- * or where it failed.
- */
- @Test
- public void testWriterSimpler() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- assertEquals(1, tw.getSize()); // the root only
- // Test that adding a new top-level category works
- assertEquals(1, tw.addCategory(new CategoryPath("a")));
- assertEquals(2, tw.getSize());
- // Test that adding the same category again is noticed, and the
- // same ordinal (and not a new one) is returned.
- assertEquals(1, tw.addCategory(new CategoryPath("a")));
- assertEquals(2, tw.getSize());
- // Test that adding another top-level category returns a new ordinal,
- // not the same one
- assertEquals(2, tw.addCategory(new CategoryPath("b")));
- assertEquals(3, tw.getSize());
- // Test that adding a category inside one of the above adds just one
- // new ordinal:
- assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
- assertEquals(4, tw.getSize());
- // Test that adding the same second-level category doesn't do anything:
- assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
- assertEquals(4, tw.getSize());
- // Test that adding a second-level category with two new components
- // indeed adds two categories
- assertEquals(5, tw.addCategory(new CategoryPath("d","e")));
- assertEquals(6, tw.getSize());
- // Verify that the parents were added above in the order we expected
- assertEquals(4, tw.addCategory(new CategoryPath("d")));
- // Similar, but inside a category that already exists:
- assertEquals(7, tw.addCategory(new CategoryPath("b", "d","e")));
- assertEquals(8, tw.getSize());
- // And now inside two levels of categories that already exist:
- assertEquals(8, tw.addCategory(new CategoryPath("b", "d","f")));
- assertEquals(9, tw.getSize());
-
- tw.close();
- indexDir.close();
- }
-
- /** Test writing an empty index, and seeing that a reader finds in it
- the root category, and only it. We check all the methods on that
- root category return the expected results.
- */
- @Test
- public void testRootOnly() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- // right after opening the index, it should already contain the
- // root, so have size 1:
- assertEquals(1, tw.getSize());
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- assertEquals(1, tr.getSize());
- assertEquals(0, tr.getPath(0).length());
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
- assertEquals(0, tr.getOrdinal(new CategoryPath()));
- tr.close();
- indexDir.close();
- }
-
- /** The following test is exactly the same as testRootOnly, except we
- * do not close the writer before opening the reader. We want to see
- * that the root is visible to the reader not only after the writer is
- * closed, but immediately after it is created.
- */
- @Test
- public void testRootOnly2() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- assertEquals(1, tr.getSize());
- assertEquals(0, tr.getPath(0).length());
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
- assertEquals(0, tr.getOrdinal(new CategoryPath()));
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- /** Basic tests for TaxonomyReader's category <=> ordinal transformations
- (getSize(), getCategory() and getOrdinal()).
- We test that after writing the index, it can be read and all the
- categories and ordinals are there just as we expected them to be.
- */
- @Test
- public void testReaderBasic() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- // test TaxonomyReader.getSize():
- assertEquals(expectedCategories.length, tr.getSize());
-
- // test round trips of ordinal => category => ordinal
- for (int i=0; i<tr.getSize(); i++) {
- assertEquals(i, tr.getOrdinal(tr.getPath(i)));
- }
-
- // test TaxonomyReader.getCategory():
- for (int i=0; i<tr.getSize(); i++) {
- CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
- CategoryPath category = tr.getPath(i);
- if (!expectedCategory.equals(category)) {
- fail("For ordinal "+i+" expected category "+
- showcat(expectedCategory)+", but got "+showcat(category));
- }
- }
- // (also test invalid ordinals:)
- assertNull(tr.getPath(-1));
- assertNull(tr.getPath(tr.getSize()));
- assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
-
- // test TaxonomyReader.getOrdinal():
- for (int i=0; i<expectedCategories.length; i++) {
- int expectedOrdinal = i;
- int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
- if (expectedOrdinal != ordinal) {
- fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
- expectedOrdinal+", but got "+ordinal);
- }
- }
- // (also test invalid categories:)
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("non-existant")));
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("Author", "Jules Verne")));
-
- tr.close();
- indexDir.close();
- }
-
- /** Tests for TaxonomyReader's getParent() method.
- We check it by comparing its results to those we could have gotten by
- looking at the category string paths (where the parentage is obvious).
- Note that after testReaderBasic(), we already know we can trust the
- ordinal <=> category conversions.
-
- Note: At the moment, the parent methods in the reader are deprecated,
- but this does not mean they should not be tested! Until they are
- removed (*if* they are removed), these tests should remain to see
- that they still work correctly.
- */
-
- @Test
- public void testReaderParent() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- // check that the parent of the root ordinal is the invalid ordinal:
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
-
- // check parent of non-root ordinals:
- for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
- CategoryPath me = tr.getPath(ordinal);
- int parentOrdinal = tr.getParent(ordinal);
- CategoryPath parent = tr.getPath(parentOrdinal);
- if (parent==null) {
- fail("Parent of "+ordinal+" is "+parentOrdinal+
- ", but this is not a valid category.");
- }
- // verify that the parent is indeed my parent, according to the strings
- if (!new CategoryPath(me, me.length()-1).equals(parent)) {
- fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
- " but categories are "+showcat(parent)+" and "+showcat(me)+
- " respectively.");
- }
- }
-
- // check parent of of invalid ordinals:
- try {
- tr.getParent(-1);
- fail("getParent for -1 should throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- try {
- tr.getParent(TaxonomyReader.INVALID_ORDINAL);
- fail("getParent for INVALID_ORDINAL should throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- try {
- int parent = tr.getParent(tr.getSize());
- fail("getParent for getSize() should throw exception, but returned "+parent);
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
-
- tr.close();
- indexDir.close();
- }
-
- /**
- * Tests for TaxonomyWriter's getParent() method. We check it by comparing
- * its results to those we could have gotten by looking at the category
- * string paths using a TaxonomyReader (where the parentage is obvious).
- * Note that after testReaderBasic(), we already know we can trust the
- * ordinal <=> category conversions from TaxonomyReader.
- *
- * The difference between testWriterParent1 and testWriterParent2 is that
- * the former closes the taxonomy writer before reopening it, while the
- * latter does not.
- *
- * This test code is virtually identical to that of testReaderParent().
- */
- @Test
- public void testWriterParent1() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- tw = new LuceneTaxonomyWriter(indexDir);
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- checkWriterParent(tr, tw);
-
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- @Test
- public void testWriterParent2() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- checkWriterParent(tr, tw);
-
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
- // check that the parent of the root ordinal is the invalid ordinal:
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));
-
- // check parent of non-root ordinals:
- for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
- CategoryPath me = tr.getPath(ordinal);
- int parentOrdinal = tw.getParent(ordinal);
- CategoryPath parent = tr.getPath(parentOrdinal);
- if (parent == null) {
- fail("Parent of " + ordinal + " is " + parentOrdinal
- + ", but this is not a valid category.");
- }
- // verify that the parent is indeed my parent, according to the
- // strings
- if (!new CategoryPath(me, me.length() - 1).equals(parent)) {
- fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
- + " but categories are " + showcat(parent) + " and "
- + showcat(me) + " respectively.");
- }
- }
-
- // check parent of of invalid ordinals:
- try {
- tw.getParent(-1);
- fail("getParent for -1 should throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- try {
- tw.getParent(TaxonomyReader.INVALID_ORDINAL);
- fail("getParent for INVALID_ORDINAL should throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- try {
- int parent = tw.getParent(tr.getSize());
- fail("getParent for getSize() should throw exception, but returned "
- + parent);
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- }
-
- /** Tests TaxonomyReader's getParentArray() method. We do not test this
- method directly, but rather just compare its results to those from
- other methods (which we have already tested above).
- */
- @Test
- public void testReaderParentArray() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- int[] parents = tr.getParentArray();
- assertEquals(tr.getSize(), parents.length);
- for (int i=0; i<tr.getSize(); i++) {
- assertEquals(tr.getParent(i), parents[i]);
- }
- tr.close();
- indexDir.close();
- }
-
- /**
- * Test TaxonomyReader's child browsing method, getChildrenArrays()
- * This only tests for correctness of the data on one example - we have
- * below further tests on data refresh etc.
- */
- @Test
- public void testChildrenArrays() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- ChildrenArrays ca = tr.getChildrenArrays();
- int[] youngestChildArray = ca.getYoungestChildArray();
- assertEquals(tr.getSize(), youngestChildArray.length);
- int[] olderSiblingArray = ca.getOlderSiblingArray();
- assertEquals(tr.getSize(), olderSiblingArray.length);
- for (int i=0; i<expectedCategories.length; i++) {
- // find expected children by looking at all expectedCategories
- // for children
- ArrayList<Integer> expectedChildren = new ArrayList<Integer>();
- for (int j=expectedCategories.length-1; j>=0; j--) {
- if (expectedCategories[j].length != expectedCategories[i].length+1) {
- continue; // not longer by 1, so can't be a child
- }
- boolean ischild=true;
- for (int k=0; k<expectedCategories[i].length; k++) {
- if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
- ischild=false;
- break;
- }
- }
- if (ischild) {
- expectedChildren.add(j);
- }
- }
- // check that children and expectedChildren are the same, with the
- // correct reverse (youngest to oldest) order:
- if (expectedChildren.size()==0) {
- assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
- } else {
- int child = youngestChildArray[i];
- assertEquals(expectedChildren.get(0).intValue(),
- child);
- for (int j=1; j<expectedChildren.size(); j++) {
- child = olderSiblingArray[child];
- assertEquals(expectedChildren.get(j).intValue(),
- child);
- // if child is INVALID_ORDINAL we should stop, but
- // assertEquals would fail in this case anyway.
- }
- // When we're done comparing, olderSiblingArray should now point
- // to INVALID_ORDINAL, saying there are no more children. If it
- // doesn't, we found too many children...
- assertEquals(-1, olderSiblingArray[child]);
- }
- }
- tr.close();
- indexDir.close();
- }
-
- /**
- * Similar to testChildrenArrays, except rather than look at
- * expected results, we test for several "invariants" that the results
- * should uphold, e.g., that a child of a category indeed has this category
- * as its parent. This sort of test can more easily be extended to larger
- * example taxonomies, because we do not need to build the expected list
- * of categories like we did in the above test.
- */
- @Test
- public void testChildrenArraysInvariants() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- ChildrenArrays ca = tr.getChildrenArrays();
- int[] youngestChildArray = ca.getYoungestChildArray();
- assertEquals(tr.getSize(), youngestChildArray.length);
- int[] olderSiblingArray = ca.getOlderSiblingArray();
- assertEquals(tr.getSize(), olderSiblingArray.length);
-
- // test that the "youngest child" of every category is indeed a child:
- for (int i=0; i<tr.getSize(); i++) {
- int youngestChild = youngestChildArray[i];
- if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
- assertEquals(i, tr.getParent(youngestChild));
- }
- }
-
- // test that the "older sibling" of every category is indeed older (lower)
- // (it can also be INVALID_ORDINAL, which is lower than any ordinal)
- for (int i=0; i<tr.getSize(); i++) {
- assertTrue("olderSiblingArray["+i+"] should be <"+i, olderSiblingArray[i] < i);
- }
-
- // test that the "older sibling" of every category is indeed a sibling
- // (they share the same parent)
- for (int i=0; i<tr.getSize(); i++) {
- int sibling = olderSiblingArray[i];
- if (sibling == TaxonomyReader.INVALID_ORDINAL) {
- continue;
- }
- assertEquals(tr.getParent(i), tr.getParent(sibling));
- }
-
- // And now for slightly more complex (and less "invariant-like"...)
- // tests:
-
- // test that the "youngest child" is indeed the youngest (so we don't
- // miss the first children in the chain)
- for (int i=0; i<tr.getSize(); i++) {
- // Find the really youngest child:
- int j;
- for (j=tr.getSize()-1; j>i; j--) {
- if (tr.getParent(j)==i) {
- break; // found youngest child
- }
- }
- if (j==i) { // no child found
- j=TaxonomyReader.INVALID_ORDINAL;
- }
- assertEquals(j, youngestChildArray[i]);
- }
-
- // test that the "older sibling" is indeed the least oldest one - and
- // not a too old one or -1 (so we didn't miss some children in the
- // middle or the end of the chain).
- for (int i=0; i<tr.getSize(); i++) {
- // Find the youngest older sibling:
- int j;
- for (j=i-1; j>=0; j--) {
- if (tr.getParent(j)==tr.getParent(i)) {
- break; // found youngest older sibling
- }
- }
- if (j<0) { // no sibling found
- j=TaxonomyReader.INVALID_ORDINAL;
- }
- assertEquals(j, olderSiblingArray[i]);
- }
-
- tr.close();
- indexDir.close();
- }
-
- /**
- * Test how getChildrenArrays() deals with the taxonomy's growth:
- */
- @Test
- public void testChildrenArraysGrowth() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("hi", "there"));
- tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- ChildrenArrays ca = tr.getChildrenArrays();
- assertEquals(3, tr.getSize());
- assertEquals(3, ca.getOlderSiblingArray().length);
- assertEquals(3, ca.getYoungestChildArray().length);
- assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.getYoungestChildArray()));
- assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.getOlderSiblingArray()));
- tw.addCategory(new CategoryPath("hi", "ho"));
- tw.addCategory(new CategoryPath("hello"));
- tw.commit();
- // Before refresh, nothing changed..
- ChildrenArrays newca = tr.getChildrenArrays();
- assertSame(newca, ca); // we got exactly the same object
- assertEquals(3, tr.getSize());
- assertEquals(3, ca.getOlderSiblingArray().length);
- assertEquals(3, ca.getYoungestChildArray().length);
- // After the refresh, things change:
- tr.refresh();
- ca = tr.getChildrenArrays();
- assertEquals(5, tr.getSize());
- assertEquals(5, ca.getOlderSiblingArray().length);
- assertEquals(5, ca.getYoungestChildArray().length);
- assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.getYoungestChildArray()));
- assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.getOlderSiblingArray()));
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- /**
- * Test that getParentArrays is valid when retrieved during refresh
- */
- @Test
- @Ignore
- public void testTaxonomyReaderRefreshRaces() throws Exception {
- // compute base child arrays - after first chunk, and after the other
- Directory indexDirBase = newDirectory();
- TaxonomyWriter twBase = new LuceneTaxonomyWriter(indexDirBase);
- twBase.addCategory(new CategoryPath("a", "0"));
- final CategoryPath abPath = new CategoryPath("a", "b");
- twBase.addCategory(abPath);
- twBase.commit();
- TaxonomyReader trBase = new LuceneTaxonomyReader(indexDirBase);
-
- final ChildrenArrays ca1 = trBase.getChildrenArrays();
-
- final int abOrd = trBase.getOrdinal(abPath);
- final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
-
- for (int i=0; i < 1<<10; i++) { //1024 facets
- twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
- }
- twBase.commit();
-
- trBase.refresh();
-
- final ChildrenArrays ca2 = trBase.getChildrenArrays();
- final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
-
- for (int retry=0; retry<100; retry++) {
- assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry);
- }
- indexDirBase.close();
- }
-
- private void assertConsistentYoungestChild(final CategoryPath abPath,
- final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
- throws Exception {
- SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("a", "0"));
- tw.addCategory(abPath);
- tw.commit();
-
- final TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- for (int i=0; i < 1<<10; i++) { //1024 facets
- final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
- tw.addCategory(cp);
- assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
- }
- tw.commit();
-
- final boolean[] stop = new boolean[] { false };
- final Throwable[] error = new Throwable[] { null };
- final int retrieval[] = { 0 };
-
- Thread thread = new Thread("Child Arrays Verifier") {
- @Override
- public void run() {
- setPriority(1+getPriority());
- try {
- while (!stop[0]) {
- int lastOrd = tr.getParentArray().length-1;
- assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd));
- assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++);
- }
- } catch (Throwable e) {
- error[0] = e;
- stop[0] = true;
- }
- }
-
- private void assertChildrenArrays(ChildrenArrays ca, int retry, int retrieval) {
- final int abYoungChild = ca.getYoungestChildArray()[abOrd];
- assertTrue(
- "Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
- ") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
- abYoungChildBase1==abYoungChild ||
- abYoungChildBase2==ca.getYoungestChildArray()[abOrd]);
- }
- };
- thread.start();
-
- indexDir.setSleepMillis(1); // some delay for refresh
- tr.refresh();
-
- stop[0] = true;
- thread.join();
- assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
-
- tw.close();
- tr.close();
- }
-
- /** Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
- * outside the thread to show the stack trace in case of failure. */
- private String stackTraceStr(final Throwable error) {
- if (error == null) {
- return "";
- }
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw);
- error.printStackTrace(pw);
- pw.close();
- return sw.toString();
- }
-
- /** Test that if separate reader and writer objects are opened, new
- categories written into the writer are available to a reader only
- after a commit().
- Note that this test obviously doesn't cover all the different
- concurrency scenarios, all different methods, and so on. We may
- want to write more tests of this sort.
-
- This test simulates what would happen when there are two separate
- processes, one doing indexing, and the other searching, and each opens
- its own object (with obviously no connection between the objects) using
- the same disk files. Note, though, that this test does not test what
- happens when the two processes do their actual work at exactly the same
- time.
- It also doesn't test multi-threading.
- */
- @Test
- public void testSeparateReaderAndWriter() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- int author = 1;
-
- // getParent() and getSize() test:
- try {
- tr.getParent(author);
- fail("Initially, getParent for "+author+" should throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
- tw.addCategory(new CategoryPath("Author"));
- try {
- tr.getParent(author);
- fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- assertEquals(1, tr.getSize()); // still root only...
- tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
- try {
- tr.getParent(author);
- fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- assertEquals(1, tr.getSize()); // still root only...
- tw.commit();
- try {
- tr.getParent(author);
- fail("Before refresh(), getParent for "+author+" should still throw exception");
- } catch (ArrayIndexOutOfBoundsException e) {
- // ok
- }
- assertEquals(1, tr.getSize()); // still root only...
- tr.refresh();
- try {
- assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
- // ok
- } catch (ArrayIndexOutOfBoundsException e) {
- fail("After category addition, commit() and refresh(), getParent for "+author+" should NOT throw exception");
- }
- assertEquals(2, tr.getSize()); // finally, see there are two categories
-
- // now, add another category, and verify that after commit and refresh
- // the parent of this category is correct (this requires the reader
- // to correctly update its prefetched parent vector), and that the
- // old information also wasn't ruined:
- tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
- int dawkins = 2;
- tw.commit();
- tr.refresh();
- assertEquals(author, tr.getParent(dawkins));
- assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
- assertEquals(3, tr.getSize());
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- @Test
- public void testSeparateReaderAndWriter2() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
-
- // Test getOrdinal():
- CategoryPath author = new CategoryPath("Author");
-
- assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
- tw.addCategory(author);
- // before commit and refresh, no change:
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
- assertEquals(1, tr.getSize()); // still root only...
- tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
- assertEquals(1, tr.getSize()); // still root only...
- tw.commit();
- // still not enough before refresh:
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
- assertEquals(1, tr.getSize()); // still root only...
- tr.refresh(); // finally
- assertEquals(1, tr.getOrdinal(author));
- assertEquals(2, tr.getSize()); // still root only...
- tw.close();
- tr.close();
- indexDir.close();
- }
-
- /**
- * Test what happens if we try to write to a locked taxonomy writer,
- * and see that we can unlock it and continue.
- */
- @Test
- public void testWriterLock() throws Exception {
- // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
- Directory indexDir = new RAMDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("hi", "there"));
- tw.commit();
- // we deliberately not close the write now, and keep it open and
- // locked.
- // Verify that the writer worked:
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
- assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
- // Try to open a second writer, with the first one locking the directory.
- // We expect to get a LockObtainFailedException.
- try {
- new LuceneTaxonomyWriter(indexDir);
- fail("should have failed to write in locked directory");
- } catch (LockObtainFailedException e) {
- // this is what we expect to happen.
- }
- // Remove the lock, and now the open should succeed, and we can
- // write to the new writer.
- LuceneTaxonomyWriter.unlock(indexDir);
- TaxonomyWriter tw2 = new LuceneTaxonomyWriter(indexDir);
- tw2.addCategory(new CategoryPath("hey"));
- tw2.close();
- // See that the writer indeed wrote:
- tr.refresh();
- assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
- tr.close();
- tw.close();
- indexDir.close();
- }
-
- /**
- * fillTaxonomyCheckPaths adds the categories in the categories[] array,
- * and asserts that the additions return exactly paths specified in
- * expectedPaths[]. This is the same add fillTaxonomy() but also checks
- * the correctness of getParent(), not just addCategory().
- * Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
- * taxonomy index. Calling it after something else was already added to the
- * taxonomy index will surely have this method fail.
- */
- public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
- for (int i = 0; i < categories.length; i++) {
- int ordinal = tw.addCategory(new CategoryPath(categories[i]));
- int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
- if (ordinal!=expectedOrdinal) {
- fail("For category "+showcat(categories[i])+" expected ordinal "+
- expectedOrdinal+", but got "+ordinal);
- }
- for (int j=expectedPaths[i].length-2; j>=0; j--) {
- ordinal = tw.getParent(ordinal);
- expectedOrdinal = expectedPaths[i][j];
- if (ordinal!=expectedOrdinal) {
- fail("For category "+showcat(categories[i])+" expected ancestor level "+
- (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
- ", but got "+ordinal);
- }
- }
- }
- }
-
- // After fillTaxonomy returned successfully, checkPaths() checks that
- // the getParent() calls return as expected, from the table
- public static void checkPaths(TaxonomyWriter tw) throws IOException {
- for (int i = 0; i < categories.length; i++) {
- int ordinal = expectedPaths[i][expectedPaths[i].length-1];
- for (int j=expectedPaths[i].length-2; j>=0; j--) {
- ordinal = tw.getParent(ordinal);
- int expectedOrdinal = expectedPaths[i][j];
- if (ordinal!=expectedOrdinal) {
- fail("For category "+showcat(categories[i])+" expected ancestor level "+
- (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
- ", but got "+ordinal);
- }
- }
- assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
- }
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
- }
-
- /**
- * Basic test for TaxonomyWriter.getParent(). This is similar to testWriter
- * above, except we also check the parents of the added categories, not just
- * the categories themselves.
- */
- @Test
- public void testWriterCheckPaths() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomyCheckPaths(tw);
- // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
- // is what we expect it to be.
- assertEquals(expectedCategories.length, tw.getSize());
- tw.close();
- indexDir.close();
- }
-
- /**
- * testWriterCheckPaths2 is the path-checking variant of testWriterTwice
- * and testWriterTwice2. After adding all the categories, we add them again,
- * and see that we get the same old ids and paths. We repeat the path checking
- * yet again after closing and opening the index for writing again - to see
- * that the reading of existing data from disk works as well.
- */
- @Test
- public void testWriterCheckPaths2() throws Exception {
- Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
- fillTaxonomy(tw);
- checkPaths(tw);
- fillTaxonomy(tw);
- checkPaths(tw);
- tw.close();
-
- tw = new LuceneTaxonomyWriter(indexDir);
- checkPaths(tw);
- fillTaxonomy(tw);
- checkPaths(tw);
- tw.close();
- indexDir.close();
- }
-
-// TODO (Facet): test multiple readers, one writer. Have the multiple readers
-// using the same object (simulating threads) or different objects
-// (simulating processes).
-}