fix for newlines in epub
[librarian.git] / librarian / epub.py
index 4677229..5f017d4 100644 (file)
@@ -19,7 +19,7 @@ from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
 
 from librarian import functions, get_resource
 
 
 from librarian import functions, get_resource
 
@@ -29,6 +29,10 @@ functions.reg_person_name()
 functions.reg_lang_code_3to2()
 
 
 functions.reg_lang_code_3to2()
 
 
+def squeeze_whitespace(s):
+    return re.sub(r'\s+', ' ', s)
+
+
 def set_hyph_language(source_tree):
     def get_short_lng_code(text):
         result = ''
 def set_hyph_language(source_tree):
     def get_short_lng_code(text):
         result = ''
@@ -113,11 +117,13 @@ def node_name(node):
     return tempnode.text
 
 
     return tempnode.text
 
 
-def xslt(xml, sheet):
+def xslt(xml, sheet, **kwargs):
     if isinstance(xml, etree._Element):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
     if isinstance(xml, etree._Element):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
-        return xml.xslt(etree.parse(xsltf))
+        transform = etree.XSLT(etree.parse(xsltf))
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        return transform(xml, **params)
 
 
 def replace_characters(node):
 
 
 def replace_characters(node):
@@ -201,6 +207,8 @@ class Stanza(object):
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
             if i:
                 self.open_normal_verse()
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
             if i:
                 self.open_normal_verse()
+            if not verse_text.strip():
+                continue
             verse = self.get_open_verse()
             if len(verse):
                 verse[-1].tail = (verse[-1].tail or "") + verse_text
             verse = self.get_open_verse()
             if len(verse):
                 verse[-1].tail = (verse[-1].tail or "") + verse_text
@@ -409,7 +417,7 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
 
 
 def transform(wldoc, verbose=False, style=None, html_toc=False,
 
 
 def transform(wldoc, verbose=False, style=None, html_toc=False,
-              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path=''):
+              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
     """ produces a EPUB file
 
     sample=n: generate sample e-book (with at least n paragraphs)
@@ -431,17 +439,15 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         chars = set()
         if first:
             # write book title page
         chars = set()
         if first:
             # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
             chars = used_chars(html_tree.getroot())
             chars = used_chars(html_tree.getroot())
-            zip.writestr(
-                'OPS/title.html',
-                etree.tostring(
-                    html_tree, pretty_print=True, xml_declaration=True,
-                    encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-                )
+            html_string = etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
             )
             )
+            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
             # add a title page TOC entry
             toc.add(u"Strona tytułowa", "title.html")
         elif wldoc.book_info.parts:
             # add a title page TOC entry
             toc.add(u"Strona tytułowa", "title.html")
         elif wldoc.book_info.parts:
@@ -458,7 +464,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
-            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
+            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
@@ -484,7 +490,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
+                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
@@ -528,12 +534,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     functions.reg_mathml_epub(zip)
 
 
     functions.reg_mathml_epub(zip)
 
-    for i, filename in enumerate(os.listdir(ilustr_path)):
-        file_path = os.path.join(ilustr_path, filename)
-        zip.write(file_path, os.path.join('OPS', filename))
-        image_id = 'image%s' % i
-        manifest.append(etree.fromstring(
-            '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+    if os.path.isdir(ilustr_path):
+        for i, filename in enumerate(os.listdir(ilustr_path)):
+            file_path = os.path.join(ilustr_path, filename)
+            zip.write(file_path, os.path.join('OPS', filename))
+            image_id = 'image%s' % i
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 
     # write static elements
     mime = zipfile.ZipInfo()
 
     # write static elements
     mime = zipfile.ZipInfo()
@@ -560,7 +567,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     if cover:
         if cover is True:
 
     if cover:
         if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
@@ -640,21 +647,21 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html')).read()
     chars.update(used_chars(etree.fromstring(html_string)))
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html')).read()
     chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', html_string)
+    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
     spine.append(etree.fromstring(
         '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
     chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', etree.tostring(
+    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
         encoding="utf-8",
         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
         html_tree, pretty_print=True, xml_declaration=True,
         encoding="utf-8",
         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    ))
+    )))
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
@@ -670,11 +677,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
                               os.path.join(tmpdir, fname)]
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
                               os.path.join(tmpdir, fname)]
+            env = {"PERL_USE_UNSAFE_INC": "1"}
             if verbose:
                 print "Running font-optimizer"
             if verbose:
                 print "Running font-optimizer"
-                subprocess.check_call(optimizer_call)
+                subprocess.check_call(optimizer_call, env=env)
             else:
             else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                dev_null = open(os.devnull, 'w')
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))