fix for newlines in epub
[librarian.git] / librarian / epub.py
index 3e9056c..5f017d4 100644 (file)
@@ -19,7 +19,7 @@ from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
 from shutil import rmtree
 
 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
 
 from librarian import functions, get_resource
 
 
 from librarian import functions, get_resource
 
@@ -29,6 +29,10 @@ functions.reg_person_name()
 functions.reg_lang_code_3to2()
 
 
 functions.reg_lang_code_3to2()
 
 
+def squeeze_whitespace(s):
+    return re.sub(r'\s+', ' ', s)
+
+
 def set_hyph_language(source_tree):
     def get_short_lng_code(text):
         result = ''
 def set_hyph_language(source_tree):
     def get_short_lng_code(text):
         result = ''
@@ -203,6 +207,8 @@ class Stanza(object):
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
             if i:
                 self.open_normal_verse()
         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
             if i:
                 self.open_normal_verse()
+            if not verse_text.strip():
+                continue
             verse = self.get_open_verse()
             if len(verse):
                 verse[-1].tail = (verse[-1].tail or "") + verse_text
             verse = self.get_open_verse()
             if len(verse):
                 verse[-1].tail = (verse[-1].tail or "") + verse_text
@@ -435,15 +441,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
             # write book title page
             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
             chars = used_chars(html_tree.getroot())
             # write book title page
             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
             chars = used_chars(html_tree.getroot())
-            zip.writestr(
-                'OPS/title.html',
-                etree.tostring(
-                    html_tree, pretty_print=True, xml_declaration=True,
-                    encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-                )
+            html_string = etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
             )
             )
+            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
             # add a title page TOC entry
             toc.add(u"Strona tytułowa", "title.html")
         elif wldoc.book_info.parts:
             # add a title page TOC entry
             toc.add(u"Strona tytułowa", "title.html")
         elif wldoc.book_info.parts:
@@ -460,7 +464,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                 )
-            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
+            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
             add_to_manifest(manifest, chunk_counter)
             add_to_spine(spine, chunk_counter)
             chunk_counter += 1
@@ -486,7 +490,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
 
                 toc.extend(chunk_toc)
                 chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
+                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
                 add_to_manifest(manifest, chunk_counter)
                 add_to_spine(spine, chunk_counter)
                 chunk_counter += 1
@@ -563,7 +567,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
     if cover:
         if cover is True:
 
     if cover:
         if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
 
         cover_file = StringIO()
         bound_cover = cover(document.book_info)
@@ -643,7 +647,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html')).read()
     chars.update(used_chars(etree.fromstring(html_string)))
         '<itemref idref="support" />'))
     html_string = open(get_resource('epub/support.html')).read()
     chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', html_string)
+    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
 
     toc.add("Strona redakcyjna", "last.html")
     manifest.append(etree.fromstring(
@@ -652,12 +656,12 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         '<itemref idref="last" />'))
     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
         '<itemref idref="last" />'))
     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', etree.tostring(
+    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
         html_tree, pretty_print=True, xml_declaration=True,
         encoding="utf-8",
         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
         html_tree, pretty_print=True, xml_declaration=True,
         encoding="utf-8",
         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    ))
+    )))
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
@@ -673,12 +677,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
                               os.path.join(tmpdir, fname)]
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
                               os.path.join(tmpdir, fname)]
+            env = {"PERL_USE_UNSAFE_INC": "1"}
             if verbose:
                 print "Running font-optimizer"
             if verbose:
                 print "Running font-optimizer"
-                subprocess.check_call(optimizer_call)
+                subprocess.check_call(optimizer_call, env=env)
             else:
                 dev_null = open(os.devnull, 'w')
             else:
                 dev_null = open(os.devnull, 'w')
-                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
             manifest.append(etree.fromstring(
                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))