updated tests
[librarian.git] / librarian / epub.py
index e992f40..e25ce8f 100644 (file)
@@ -15,7 +15,6 @@ from tempfile import mkdtemp
 from shutil import rmtree
 
 import sys
 from shutil import rmtree
 
 import sys
-sys.path.append('..') # for running from working copy
 
 from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, NoDublinCore
 from librarian.dcparser import BookInfo
 
 from librarian import XMLNamespace, RDFNS, DCNS, WLNS, NCXNS, OPFNS, NoDublinCore
 from librarian.dcparser import BookInfo
@@ -259,16 +258,18 @@ def transform_chunk(chunk_xml, chunk_no, annotations):
     replace_by_verse(chunk_xml)
     html_tree = xslt(chunk_xml, res('xsltScheme.xsl'))
     chars = used_chars(html_tree.getroot())
     replace_by_verse(chunk_xml)
     html_tree = xslt(chunk_xml, res('xsltScheme.xsl'))
     chars = used_chars(html_tree.getroot())
-    output_html = etree.tostring(html_tree, pretty_print=True)
+    output_html = etree.tostring(html_tree, method="html", pretty_print=True)
     return output_html, toc, chars
 
 
     return output_html, toc, chars
 
 
-def transform(provider, slug, output_file=None, output_dir=None):
-    """ produces an epub
+def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False):
+    """ produces a EPUB file
 
 
-    provider is a DocProvider
-    either output_file (a file-like object) or output_dir (path to file/dir) should be specified
-    if output_dir is specified, file will be written to <output_dir>/<author>/<slug>.epub
+    provider: a DocProvider
+    slug: slug of file to process, available by provider
+    output_file: file-like object or path to output file
+    output_dir: path to directory to save output file to; either this or output_file must be present
+    make_dir: writes output to <output_dir>/<author>/<slug>.epub instead of <output_dir>/<slug>.epub
     """
 
     def transform_file(input_xml, chunk_counter=1, first=True):
     """
 
     def transform_file(input_xml, chunk_counter=1, first=True):
@@ -285,13 +286,13 @@ def transform(provider, slug, output_file=None, output_dir=None):
             html_tree = xslt(input_xml, res('xsltTitle.xsl'))
             chars = used_chars(html_tree.getroot())
             zip.writestr('OPS/title.html',
             html_tree = xslt(input_xml, res('xsltTitle.xsl'))
             chars = used_chars(html_tree.getroot())
             zip.writestr('OPS/title.html',
-                 etree.tostring(html_tree, pretty_print=True))
+                 etree.tostring(html_tree, method="html", pretty_print=True))
         elif children:
             # write title page for every parent
             html_tree = xslt(input_xml, res('xsltChunkTitle.xsl'))
             chars = used_chars(html_tree.getroot())
             zip.writestr('OPS/part%d.html' % chunk_counter, 
         elif children:
             # write title page for every parent
             html_tree = xslt(input_xml, res('xsltChunkTitle.xsl'))
             chars = used_chars(html_tree.getroot())
             zip.writestr('OPS/part%d.html' % chunk_counter, 
-                etree.tostring(html_tree, pretty_print=True))
+                etree.tostring(html_tree, method="html", pretty_print=True))
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
@@ -327,7 +328,17 @@ def transform(provider, slug, output_file=None, output_dir=None):
         return toc, chunk_counter, chars
 
     # read metadata from the first file
         return toc, chunk_counter, chars
 
     # read metadata from the first file
-    input_xml = etree.parse(provider[slug])
+    if file_path:
+        if slug:
+            raise ValueError('slug or file_path should be specified, not both')
+        f = open(file_path, 'r')
+        input_xml = etree.parse(f)
+        f.close()
+    else:
+        if not slug:
+            raise ValueError('either slug or file_path should be specified')
+        input_xml = etree.parse(provider[slug])
+
     metadata = input_xml.find('.//'+RDFNS('Description'))
     if metadata is None:
         raise NoDublinCore('Document has no DublinCore - which is required.')
     metadata = input_xml.find('.//'+RDFNS('Description'))
     if metadata is None:
         raise NoDublinCore('Document has no DublinCore - which is required.')
@@ -336,14 +347,17 @@ def transform(provider, slug, output_file=None, output_dir=None):
 
     # if output to dir, create the file
     if output_dir is not None:
 
     # if output to dir, create the file
     if output_dir is not None:
-        author = unicode(book_info.author)
-        author_dir = os.path.join(output_dir, author)
-        try:
-            os.makedirs(author_dir)
-        except OSError:
-            pass
-        output_file = open(os.path.join(author_dir, '%s.epub' % slug), 'w')
-
+        if make_dir:
+            author = unicode(book_info.author)
+            output_dir = os.path.join(output_dir, author)
+            try:
+                os.makedirs(output_dir)
+            except OSError:
+                pass
+        if slug:
+            output_file = open(os.path.join(output_dir, '%s.epub' % slug), 'w')
+        else:
+            output_file = open(os.path.join(output_dir, os.path.splitext(os.path.basename(file_path))[0] + '.epub'), 'w')
 
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
 
     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
@@ -395,15 +409,20 @@ def transform(provider, slug, output_file=None, output_dir=None):
         html_tree = xslt(annotations, res("xsltAnnotations.xsl"))
         chars = chars.union(used_chars(html_tree.getroot()))
         zip.writestr('OPS/annotations.html', etree.tostring(
         html_tree = xslt(annotations, res("xsltAnnotations.xsl"))
         chars = chars.union(used_chars(html_tree.getroot()))
         zip.writestr('OPS/annotations.html', etree.tostring(
-                            html_tree, pretty_print=True))
+                            html_tree, method="html", pretty_print=True))
 
     # strip fonts
     tmpdir = mkdtemp('-librarian-epub')
     cwd = os.getcwd()
 
 
     # strip fonts
     tmpdir = mkdtemp('-librarian-epub')
     cwd = os.getcwd()
 
-    os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../font-optimizer'))
+    os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
     for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
     for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
-        subprocess.check_call(['./subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)])
+        optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), res('../fonts/' + fname), os.path.join(tmpdir, fname)]
+        if verbose:
+            print "Running font-optimizer"
+            subprocess.check_call(optimizer_call)
+        else:
+            subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
     rmtree(tmpdir)
     os.chdir(cwd)
         zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
     rmtree(tmpdir)
     os.chdir(cwd)
@@ -422,18 +441,3 @@ def transform(provider, slug, output_file=None, output_dir=None):
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
     zip.close()
     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
     zip.close()
-
-
-if __name__ == '__main__':
-    from librarian import DirDocProvider
-
-    if len(sys.argv) < 2:
-        print >> sys.stderr, 'Usage: python epub.py <input file>'
-        sys.exit(1)
-
-    main_input = sys.argv[1]
-    basepath, ext = os.path.splitext(main_input)
-    path, slug = os.path.realpath(basepath).rsplit('/', 1)
-    provider = DirDocProvider(path)
-    transform(provider, slug, output_dir=path)
-