fix for newlines in epub

[librarian.git] / librarian / epub.py
diff --git a/librarian/epub.py b/librarian/epub.py

index 4677229..5f017d4 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -19,7 +19,7 @@ from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
  
  from librarian import functions, get_resource
  
  
  from librarian import functions, get_resource
  
@@ -29,6 +29,10 @@ functions.reg_person_name()
  functions.reg_lang_code_3to2()
  
  
  functions.reg_lang_code_3to2()
  
  
+def squeeze_whitespace(s):
+    return re.sub(r'\s+', ' ', s)
+
+
  def set_hyph_language(source_tree):
      def get_short_lng_code(text):
          result = ''
  def set_hyph_language(source_tree):
      def get_short_lng_code(text):
          result = ''
@@ -113,11 +117,13 @@ def node_name(node):
      return tempnode.text
  
  
      return tempnode.text
  
  
-def xslt(xml, sheet):
+def xslt(xml, sheet, **kwargs):
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
-        return xml.xslt(etree.parse(xsltf))
+        transform = etree.XSLT(etree.parse(xsltf))
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        return transform(xml, **params)
  
  
  def replace_characters(node):
  
  
  def replace_characters(node):
@@ -201,6 +207,8 @@ class Stanza(object):
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
              if i:
                  self.open_normal_verse()
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
              if i:
                  self.open_normal_verse()
+            if not verse_text.strip():
+                continue
              verse = self.get_open_verse()
              if len(verse):
                  verse[-1].tail = (verse[-1].tail or "") + verse_text
              verse = self.get_open_verse()
              if len(verse):
                  verse[-1].tail = (verse[-1].tail or "") + verse_text
@@ -409,7 +417,7 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
  
  
  def transform(wldoc, verbose=False, style=None, html_toc=False,
  
  
  def transform(wldoc, verbose=False, style=None, html_toc=False,
-              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path=''):
+              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
@@ -431,17 +439,15 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          chars = set()
          if first:
              # write book title page
          chars = set()
          if first:
              # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
              chars = used_chars(html_tree.getroot())
              chars = used_chars(html_tree.getroot())
-            zip.writestr(
-                'OPS/title.html',
-                etree.tostring(
-                    html_tree, pretty_print=True, xml_declaration=True,
-                    encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-                )
+            html_string = etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
              )
              )
+            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
          elif wldoc.book_info.parts:
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
          elif wldoc.book_info.parts:
@@ -458,7 +464,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                      doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                              ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                  )
                      doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
                              ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
                  )
-            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
+            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
              add_to_manifest(manifest, chunk_counter)
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
              add_to_manifest(manifest, chunk_counter)
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
@@ -484,7 +490,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
  
                  toc.extend(chunk_toc)
                  chars = chars.union(chunk_chars)
  
                  toc.extend(chunk_toc)
                  chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
+                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
                  add_to_manifest(manifest, chunk_counter)
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
                  add_to_manifest(manifest, chunk_counter)
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
@@ -528,12 +534,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
  
      functions.reg_mathml_epub(zip)
  
  
      functions.reg_mathml_epub(zip)
  
-    for i, filename in enumerate(os.listdir(ilustr_path)):
-        file_path = os.path.join(ilustr_path, filename)
-        zip.write(file_path, os.path.join('OPS', filename))
-        image_id = 'image%s' % i
-        manifest.append(etree.fromstring(
-            '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+    if os.path.isdir(ilustr_path):
+        for i, filename in enumerate(os.listdir(ilustr_path)):
+            file_path = os.path.join(ilustr_path, filename)
+            zip.write(file_path, os.path.join('OPS', filename))
+            image_id = 'image%s' % i
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
  
      # write static elements
      mime = zipfile.ZipInfo()
  
      # write static elements
      mime = zipfile.ZipInfo()
@@ -560,7 +567,7 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
  
      if cover:
          if cover is True:
  
      if cover:
          if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
@@ -640,21 +647,21 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          '<itemref idref="support" />'))
      html_string = open(get_resource('epub/support.html')).read()
      chars.update(used_chars(etree.fromstring(html_string)))
          '<itemref idref="support" />'))
      html_string = open(get_resource('epub/support.html')).read()
      chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', html_string)
+    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
  
      toc.add("Strona redakcyjna", "last.html")
      manifest.append(etree.fromstring(
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
  
      toc.add("Strona redakcyjna", "last.html")
      manifest.append(etree.fromstring(
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
      chars.update(used_chars(html_tree.getroot()))
      chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', etree.tostring(
+    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
          html_tree, pretty_print=True, xml_declaration=True,
          encoding="utf-8",
          doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                  '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
          html_tree, pretty_print=True, xml_declaration=True,
          encoding="utf-8",
          doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
                  '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    ))
+    )))
  
      if not flags or 'without-fonts' not in flags:
          # strip fonts
  
      if not flags or 'without-fonts' not in flags:
          # strip fonts
@@ -670,11 +677,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                                ''.join(chars).encode('utf-8'),
                                get_resource('fonts/' + fname),
                                os.path.join(tmpdir, fname)]
                                ''.join(chars).encode('utf-8'),
                                get_resource('fonts/' + fname),
                                os.path.join(tmpdir, fname)]
+            env = {"PERL_USE_UNSAFE_INC": "1"}
              if verbose:
                  print "Running font-optimizer"
              if verbose:
                  print "Running font-optimizer"
-                subprocess.check_call(optimizer_call)
+                subprocess.check_call(optimizer_call, env=env)
              else:
              else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                dev_null = open(os.devnull, 'w')
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))