pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / backwards / src / test / org / apache / lucene / index / TestCompoundFile.java
diff --git a/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/TestCompoundFile.java b/lucene-java-3.5.0/lucene/backwards/src/test/org/apache/lucene/index/TestCompoundFile.java
new file mode 100644 (file)
index 0000000..16892a8
--- /dev/null
@@ -0,0 +1,688 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.File;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.MockDirectoryWrapper.Failure;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.store._TestHelper;
+import org.apache.lucene.util._TestUtil;
+
+
+public class TestCompoundFile extends LuceneTestCase
+{
+    private Directory dir;
+
+    @Override
+    public void setUp() throws Exception {
+       super.setUp();
+       File file = _TestUtil.getTempDir("testIndex");
+       // use a simple FSDir here, to be sure to have SimpleFSInputs
+       dir = new SimpleFSDirectory(file,null);
+    }
+
+    @Override
+    public void tearDown() throws Exception {
+       dir.close();
+       super.tearDown();
+    }
+
+    /** Creates a file of the specified size with random data. */
+    private void createRandomFile(Directory dir, String name, int size)
+    throws IOException
+    {
+        IndexOutput os = dir.createOutput(name);
+        for (int i=0; i<size; i++) {
+            byte b = (byte) (Math.random() * 256);
+            os.writeByte(b);
+        }
+        os.close();
+    }
+
+    /** Creates a file of the specified size with sequential data. The first
+     *  byte is written as the start byte provided. All subsequent bytes are
+     *  computed as start + offset where offset is the number of the byte.
+     */
+    private void createSequenceFile(Directory dir,
+                                    String name,
+                                    byte start,
+                                    int size)
+    throws IOException
+    {
+        IndexOutput os = dir.createOutput(name);
+        for (int i=0; i < size; i++) {
+            os.writeByte(start);
+            start ++;
+        }
+        os.close();
+    }
+
+
+    private void assertSameStreams(String msg,
+                                   IndexInput expected,
+                                   IndexInput test)
+    throws IOException
+    {
+        assertNotNull(msg + " null expected", expected);
+        assertNotNull(msg + " null test", test);
+        assertEquals(msg + " length", expected.length(), test.length());
+        assertEquals(msg + " position", expected.getFilePointer(),
+                                        test.getFilePointer());
+
+        byte expectedBuffer[] = new byte[512];
+        byte testBuffer[] = new byte[expectedBuffer.length];
+
+        long remainder = expected.length() - expected.getFilePointer();
+        while(remainder > 0) {
+            int readLen = (int) Math.min(remainder, expectedBuffer.length);
+            expected.readBytes(expectedBuffer, 0, readLen);
+            test.readBytes(testBuffer, 0, readLen);
+            assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer,
+                testBuffer, 0, readLen);
+            remainder -= readLen;
+        }
+    }
+
+
+    private void assertSameStreams(String msg,
+                                   IndexInput expected,
+                                   IndexInput actual,
+                                   long seekTo)
+    throws IOException
+    {
+        if(seekTo >= 0 && seekTo < expected.length())
+        {
+            expected.seek(seekTo);
+            actual.seek(seekTo);
+            assertSameStreams(msg + ", seek(mid)", expected, actual);
+        }
+    }
+
+
+
+    private void assertSameSeekBehavior(String msg,
+                                        IndexInput expected,
+                                        IndexInput actual)
+    throws IOException
+    {
+        // seek to 0
+        long point = 0;
+        assertSameStreams(msg + ", seek(0)", expected, actual, point);
+
+        // seek to middle
+        point = expected.length() / 2l;
+        assertSameStreams(msg + ", seek(mid)", expected, actual, point);
+
+        // seek to end - 2
+        point = expected.length() - 2;
+        assertSameStreams(msg + ", seek(end-2)", expected, actual, point);
+
+        // seek to end - 1
+        point = expected.length() - 1;
+        assertSameStreams(msg + ", seek(end-1)", expected, actual, point);
+
+        // seek to the end
+        point = expected.length();
+        assertSameStreams(msg + ", seek(end)", expected, actual, point);
+
+        // seek past end
+        point = expected.length() + 1;
+        assertSameStreams(msg + ", seek(end+1)", expected, actual, point);
+    }
+
+
+    private void assertEqualArrays(String msg,
+                                   byte[] expected,
+                                   byte[] test,
+                                   int start,
+                                   int len)
+    {
+        assertNotNull(msg + " null expected", expected);
+        assertNotNull(msg + " null test", test);
+
+        for (int i=start; i<len; i++) {
+            assertEquals(msg + " " + i, expected[i], test[i]);
+        }
+    }
+
+
+    // ===========================================================
+    //  Tests of the basic CompoundFile functionality
+    // ===========================================================
+
+
+    /** This test creates compound file based on a single file.
+     *  Files of different sizes are tested: 0, 1, 10, 100 bytes.
+     */
+    public void testSingleFile() throws IOException {
+        int data[] = new int[] { 0, 1, 10, 100 };
+        for (int i=0; i<data.length; i++) {
+            String name = "t" + data[i];
+            createSequenceFile(dir, name, (byte) 0, data[i]);
+            CompoundFileWriter csw = new CompoundFileWriter(dir, name + ".cfs");
+            csw.addFile(name);
+            csw.close();
+
+            CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs");
+            IndexInput expected = dir.openInput(name);
+            IndexInput actual = csr.openInput(name);
+            assertSameStreams(name, expected, actual);
+            assertSameSeekBehavior(name, expected, actual);
+            expected.close();
+            actual.close();
+            csr.close();
+        }
+    }
+
+
+    /** This test creates compound file based on two files.
+     *
+     */
+    public void testTwoFiles() throws IOException {
+        createSequenceFile(dir, "d1", (byte) 0, 15);
+        createSequenceFile(dir, "d2", (byte) 0, 114);
+
+        CompoundFileWriter csw = new CompoundFileWriter(dir, "d.csf");
+        csw.addFile("d1");
+        csw.addFile("d2");
+        csw.close();
+
+        CompoundFileReader csr = new CompoundFileReader(dir, "d.csf");
+        IndexInput expected = dir.openInput("d1");
+        IndexInput actual = csr.openInput("d1");
+        assertSameStreams("d1", expected, actual);
+        assertSameSeekBehavior("d1", expected, actual);
+        expected.close();
+        actual.close();
+
+        expected = dir.openInput("d2");
+        actual = csr.openInput("d2");
+        assertSameStreams("d2", expected, actual);
+        assertSameSeekBehavior("d2", expected, actual);
+        expected.close();
+        actual.close();
+        csr.close();
+    }
+
+    /** This test creates a compound file based on a large number of files of
+     *  various length. The file content is generated randomly. The sizes range
+     *  from 0 to 1Mb. Some of the sizes are selected to test the buffering
+     *  logic in the file reading code. For this the chunk variable is set to
+     *  the length of the buffer used internally by the compound file logic.
+     */
+    public void testRandomFiles() throws IOException {
+        // Setup the test segment
+        String segment = "test";
+        int chunk = 1024; // internal buffer size used by the stream
+        createRandomFile(dir, segment + ".zero", 0);
+        createRandomFile(dir, segment + ".one", 1);
+        createRandomFile(dir, segment + ".ten", 10);
+        createRandomFile(dir, segment + ".hundred", 100);
+        createRandomFile(dir, segment + ".big1", chunk);
+        createRandomFile(dir, segment + ".big2", chunk - 1);
+        createRandomFile(dir, segment + ".big3", chunk + 1);
+        createRandomFile(dir, segment + ".big4", 3 * chunk);
+        createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
+        createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
+        createRandomFile(dir, segment + ".big7", 1000 * chunk);
+
+        // Setup extraneous files
+        createRandomFile(dir, "onetwothree", 100);
+        createRandomFile(dir, segment + ".notIn", 50);
+        createRandomFile(dir, segment + ".notIn2", 51);
+
+        // Now test
+        CompoundFileWriter csw = new CompoundFileWriter(dir, "test.cfs");
+        final String data[] = new String[] {
+            ".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3",
+            ".big4", ".big5", ".big6", ".big7"
+        };
+        for (int i=0; i<data.length; i++) {
+            csw.addFile(segment + data[i]);
+        }
+        csw.close();
+
+        CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs");
+        for (int i=0; i<data.length; i++) {
+            IndexInput check = dir.openInput(segment + data[i]);
+            IndexInput test = csr.openInput(segment + data[i]);
+            assertSameStreams(data[i], check, test);
+            assertSameSeekBehavior(data[i], check, test);
+            test.close();
+            check.close();
+        }
+        csr.close();
+    }
+
+
+    /** Setup a larger compound file with a number of components, each of
+     *  which is a sequential file (so that we can easily tell that we are
+     *  reading in the right byte). The methods sets up 20 files - f0 to f19,
+     *  the size of each file is 1000 bytes.
+     */
+    private void setUp_2() throws IOException {
+        CompoundFileWriter cw = new CompoundFileWriter(dir, "f.comp");
+        for (int i=0; i<20; i++) {
+            createSequenceFile(dir, "f" + i, (byte) 0, 2000);
+            cw.addFile("f" + i);
+        }
+        cw.close();
+    }
+
+
+    public void testReadAfterClose() throws IOException {
+        demo_FSIndexInputBug(dir, "test");
+    }
+
+    private void demo_FSIndexInputBug(Directory fsdir, String file)
+    throws IOException
+    {
+        // Setup the test file - we need more than 1024 bytes
+        IndexOutput os = fsdir.createOutput(file);
+        for(int i=0; i<2000; i++) {
+            os.writeByte((byte) i);
+        }
+        os.close();
+
+        IndexInput in = fsdir.openInput(file);
+
+        // This read primes the buffer in IndexInput
+        in.readByte();
+
+        // Close the file
+        in.close();
+
+        // ERROR: this call should fail, but succeeds because the buffer
+        // is still filled
+        in.readByte();
+
+        // ERROR: this call should fail, but succeeds for some reason as well
+        in.seek(1099);
+
+        try {
+            // OK: this call correctly fails. We are now past the 1024 internal
+            // buffer, so an actual IO is attempted, which fails
+            in.readByte();
+            fail("expected readByte() to throw exception");
+        } catch (IOException e) {
+          // expected exception
+        }
+    }
+
+
+    static boolean isCSIndexInput(IndexInput is) {
+        return is instanceof CompoundFileReader.CSIndexInput;
+    }
+
+    static boolean isCSIndexInputOpen(IndexInput is) throws IOException {
+        if (isCSIndexInput(is)) {
+            CompoundFileReader.CSIndexInput cis =
+            (CompoundFileReader.CSIndexInput) is;
+
+            return _TestHelper.isSimpleFSIndexInputOpen(cis.base);
+        } else {
+            return false;
+        }
+    }
+
+
+    public void testClonedStreamsClosing() throws IOException {
+        setUp_2();
+        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
+
+        // basic clone
+        IndexInput expected = dir.openInput("f11");
+
+        // this test only works for FSIndexInput
+        assertTrue(_TestHelper.isSimpleFSIndexInput(expected));
+        assertTrue(_TestHelper.isSimpleFSIndexInputOpen(expected));
+
+        IndexInput one = cr.openInput("f11");
+        assertTrue(isCSIndexInputOpen(one));
+
+        IndexInput two = (IndexInput) one.clone();
+        assertTrue(isCSIndexInputOpen(two));
+
+        assertSameStreams("basic clone one", expected, one);
+        expected.seek(0);
+        assertSameStreams("basic clone two", expected, two);
+
+        // Now close the first stream
+        one.close();
+        assertTrue("Only close when cr is closed", isCSIndexInputOpen(one));
+
+        // The following should really fail since we couldn't expect to
+        // access a file once close has been called on it (regardless of
+        // buffering and/or clone magic)
+        expected.seek(0);
+        two.seek(0);
+        assertSameStreams("basic clone two/2", expected, two);
+
+
+        // Now close the compound reader
+        cr.close();
+        assertFalse("Now closed one", isCSIndexInputOpen(one));
+        assertFalse("Now closed two", isCSIndexInputOpen(two));
+
+        // The following may also fail since the compound stream is closed
+        expected.seek(0);
+        two.seek(0);
+        //assertSameStreams("basic clone two/3", expected, two);
+
+
+        // Now close the second clone
+        two.close();
+        expected.seek(0);
+        two.seek(0);
+        //assertSameStreams("basic clone two/4", expected, two);
+
+        expected.close();
+    }
+
+
+    /** This test opens two files from a compound stream and verifies that
+     *  their file positions are independent of each other.
+     */
+    public void testRandomAccess() throws IOException {
+        setUp_2();
+        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
+
+        // Open two files
+        IndexInput e1 = dir.openInput("f11");
+        IndexInput e2 = dir.openInput("f3");
+
+        IndexInput a1 = cr.openInput("f11");
+        IndexInput a2 = dir.openInput("f3");
+
+        // Seek the first pair
+        e1.seek(100);
+        a1.seek(100);
+        assertEquals(100, e1.getFilePointer());
+        assertEquals(100, a1.getFilePointer());
+        byte be1 = e1.readByte();
+        byte ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now seek the second pair
+        e2.seek(1027);
+        a2.seek(1027);
+        assertEquals(1027, e2.getFilePointer());
+        assertEquals(1027, a2.getFilePointer());
+        byte be2 = e2.readByte();
+        byte ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Now make sure the first one didn't move
+        assertEquals(101, e1.getFilePointer());
+        assertEquals(101, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now more the first one again, past the buffer length
+        e1.seek(1910);
+        a1.seek(1910);
+        assertEquals(1910, e1.getFilePointer());
+        assertEquals(1910, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now make sure the second set didn't move
+        assertEquals(1028, e2.getFilePointer());
+        assertEquals(1028, a2.getFilePointer());
+        be2 = e2.readByte();
+        ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Move the second set back, again cross the buffer size
+        e2.seek(17);
+        a2.seek(17);
+        assertEquals(17, e2.getFilePointer());
+        assertEquals(17, a2.getFilePointer());
+        be2 = e2.readByte();
+        ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Finally, make sure the first set didn't move
+        // Now make sure the first one didn't move
+        assertEquals(1911, e1.getFilePointer());
+        assertEquals(1911, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        e1.close();
+        e2.close();
+        a1.close();
+        a2.close();
+        cr.close();
+    }
+
+    /** This test opens two files from a compound stream and verifies that
+     *  their file positions are independent of each other.
+     */
+    public void testRandomAccessClones() throws IOException {
+        setUp_2();
+        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
+
+        // Open two files
+        IndexInput e1 = cr.openInput("f11");
+        IndexInput e2 = cr.openInput("f3");
+
+        IndexInput a1 = (IndexInput) e1.clone();
+        IndexInput a2 = (IndexInput) e2.clone();
+
+        // Seek the first pair
+        e1.seek(100);
+        a1.seek(100);
+        assertEquals(100, e1.getFilePointer());
+        assertEquals(100, a1.getFilePointer());
+        byte be1 = e1.readByte();
+        byte ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now seek the second pair
+        e2.seek(1027);
+        a2.seek(1027);
+        assertEquals(1027, e2.getFilePointer());
+        assertEquals(1027, a2.getFilePointer());
+        byte be2 = e2.readByte();
+        byte ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Now make sure the first one didn't move
+        assertEquals(101, e1.getFilePointer());
+        assertEquals(101, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now more the first one again, past the buffer length
+        e1.seek(1910);
+        a1.seek(1910);
+        assertEquals(1910, e1.getFilePointer());
+        assertEquals(1910, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        // Now make sure the second set didn't move
+        assertEquals(1028, e2.getFilePointer());
+        assertEquals(1028, a2.getFilePointer());
+        be2 = e2.readByte();
+        ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Move the second set back, again cross the buffer size
+        e2.seek(17);
+        a2.seek(17);
+        assertEquals(17, e2.getFilePointer());
+        assertEquals(17, a2.getFilePointer());
+        be2 = e2.readByte();
+        ba2 = a2.readByte();
+        assertEquals(be2, ba2);
+
+        // Finally, make sure the first set didn't move
+        // Now make sure the first one didn't move
+        assertEquals(1911, e1.getFilePointer());
+        assertEquals(1911, a1.getFilePointer());
+        be1 = e1.readByte();
+        ba1 = a1.readByte();
+        assertEquals(be1, ba1);
+
+        e1.close();
+        e2.close();
+        a1.close();
+        a2.close();
+        cr.close();
+    }
+
+
+    public void testFileNotFound() throws IOException {
+        setUp_2();
+        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
+
+        // Open two files
+        try {
+            cr.openInput("bogus");
+            fail("File not found");
+
+        } catch (IOException e) {
+            /* success */
+            //System.out.println("SUCCESS: File Not Found: " + e);
+        }
+
+        cr.close();
+    }
+
+
+    public void testReadPastEOF() throws IOException {
+        setUp_2();
+        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
+        IndexInput is = cr.openInput("f2");
+        is.seek(is.length() - 10);
+        byte b[] = new byte[100];
+        is.readBytes(b, 0, 10);
+
+        try {
+            is.readByte();
+            fail("Single byte read past end of file");
+        } catch (IOException e) {
+            /* success */
+            //System.out.println("SUCCESS: single byte read past end of file: " + e);
+        }
+
+        is.seek(is.length() - 10);
+        try {
+            is.readBytes(b, 0, 50);
+            fail("Block read past end of file");
+        } catch (IOException e) {
+            /* success */
+            //System.out.println("SUCCESS: block read past end of file: " + e);
+        }
+
+        is.close();
+        cr.close();
+    }
+
+    /** This test that writes larger than the size of the buffer output
+     * will correctly increment the file pointer.
+     */
+    public void testLargeWrites() throws IOException {
+        IndexOutput os = dir.createOutput("testBufferStart.txt");
+
+        byte[] largeBuf = new byte[2048];
+        for (int i=0; i<largeBuf.length; i++) {
+            largeBuf[i] = (byte) (Math.random() * 256);
+        }
+
+        long currentPos = os.getFilePointer();
+        os.writeBytes(largeBuf, largeBuf.length);
+
+        try {
+            assertEquals(currentPos + largeBuf.length, os.getFilePointer());
+        } finally {
+            os.close();
+        }
+
+    }
+    
+   public void testAddExternalFile() throws IOException {
+       createSequenceFile(dir, "d1", (byte) 0, 15);
+
+       Directory newDir = newDirectory();
+       CompoundFileWriter csw = new CompoundFileWriter(newDir, "d.csf");
+       csw.addFile("d1", dir);
+       csw.close();
+
+       CompoundFileReader csr = new CompoundFileReader(newDir, "d.csf");
+       IndexInput expected = dir.openInput("d1");
+       IndexInput actual = csr.openInput("d1");
+       assertSameStreams("d1", expected, actual);
+       assertSameSeekBehavior("d1", expected, actual);
+       expected.close();
+       actual.close();
+       csr.close();
+       
+       newDir.close();
+   }
+
+  // Make sure we don't somehow use more than 1 descriptor
+  // when reading a CFS with many subs:
+  public void testManySubFiles() throws IOException {
+
+    final Directory d = newFSDirectory(_TestUtil.getTempDir("CFSManySubFiles"));
+    final int FILE_COUNT = 10000;
+
+    for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
+      IndexOutput out = d.createOutput("file." + fileIdx);
+      out.writeByte((byte) fileIdx);
+      out.close();
+    }
+    
+    final CompoundFileWriter cfw = new CompoundFileWriter(d, "c.cfs");
+    for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
+      cfw.addFile("file." + fileIdx);
+    }
+    cfw.close();
+
+    final IndexInput[] ins = new IndexInput[FILE_COUNT];
+    final CompoundFileReader cfr = new CompoundFileReader(d, "c.cfs");
+    for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
+      ins[fileIdx] = cfr.openInput("file." + fileIdx);
+    }
+
+    for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
+      assertEquals((byte) fileIdx, ins[fileIdx].readByte());
+    }
+
+    for(int fileIdx=0;fileIdx<FILE_COUNT;fileIdx++) {
+      ins[fileIdx].close();
+    }
+    cfr.close();
+    d.close();
+  }
+}