fix for newlines in epub

[librarian.git] / librarian / epub.py
diff --git a/librarian/epub.py b/librarian/epub.py

index e52e524..5f017d4 100644 (file)
--- a/librarian/epub.py
+++ b/librarian/epub.py
@@ -11,13 +11,15 @@ import re
  import subprocess
  from StringIO import StringIO
  from copy import deepcopy
  import subprocess
  from StringIO import StringIO
  from copy import deepcopy
+from mimetypes import guess_type
+
  from lxml import etree
  import zipfile
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  from lxml import etree
  import zipfile
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
-from librarian.cover import DefaultEbookCover
+from librarian.cover import make_cover
  
  from librarian import functions, get_resource
  
  
  from librarian import functions, get_resource
  
@@ -26,6 +28,11 @@ from librarian.hyphenator import Hyphenator
  functions.reg_person_name()
  functions.reg_lang_code_3to2()
  
  functions.reg_person_name()
  functions.reg_lang_code_3to2()
  
+
+def squeeze_whitespace(s):
+    return re.sub(r'\s+', ' ', s)
+
+
  def set_hyph_language(source_tree):
      def get_short_lng_code(text):
          result = ''
  def set_hyph_language(source_tree):
      def get_short_lng_code(text):
          result = ''
@@ -34,33 +41,39 @@ def set_hyph_language(source_tree):
              for line in f:
                  list = line.strip().split('|')
                  if list[0] == text:
              for line in f:
                  list = line.strip().split('|')
                  if list[0] == text:
-                    result=list[2]
+                    result = list[2]
          if result == '':
              return text
          else:
              return result
          if result == '':
              return text
          else:
              return result
-    bibl_lng = etree.XPath('//dc:language//text()', namespaces = {'dc':str(DCNS)})(source_tree)
-    short_lng = get_short_lng_code(bibl_lng[0])   
+    bibl_lng = etree.XPath('//dc:language//text()',
+                           namespaces={'dc': str(DCNS)})(source_tree)
+    short_lng = get_short_lng_code(bibl_lng[0])
      try:
      try:
-        return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' + short_lng + '.dic'))
+        return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
+                                       short_lng + '.dic'))
      except:
          pass
      except:
          pass
-    
+
+
  def hyphenate_and_fix_conjunctions(source_tree, hyph):
  def hyphenate_and_fix_conjunctions(source_tree, hyph):
-    if hyph is not None:
-        texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
-        for t in texts:
-            parent = t.getparent()
+    texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
+    for t in texts:
+        parent = t.getparent()
+        if hyph is not None:
              newt = ''
              wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
              for w in wlist:
              newt = ''
              wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
              for w in wlist:
-                newt += hyph.inserted(w, u'\u00AD')       
-            newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
-            if t.is_text:
-                parent.text = newt
-            elif t.is_tail:
-                parent.tail = newt
-        
+                newt += hyph.inserted(w, u'\u00AD')
+        else:
+            newt = t
+        newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
+        if t.is_text:
+            parent.text = newt
+        elif t.is_tail:
+            parent.tail = newt
+
+
  def inner_xml(node):
      """ returns node's text and children as a string
  
  def inner_xml(node):
      """ returns node's text and children as a string
  
@@ -71,6 +84,7 @@ def inner_xml(node):
      nt = node.text if node.text is not None else ''
      return ''.join([nt] + [etree.tostring(child) for child in node])
  
      nt = node.text if node.text is not None else ''
      return ''.join([nt] + [etree.tostring(child) for child in node])
  
+
  def set_inner_xml(node, text):
      """ sets node's text and children from a string
  
  def set_inner_xml(node, text):
      """ sets node's text and children from a string
  
@@ -103,11 +117,13 @@ def node_name(node):
      return tempnode.text
  
  
      return tempnode.text
  
  
-def xslt(xml, sheet):
+def xslt(xml, sheet, **kwargs):
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
      if isinstance(xml, etree._Element):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
-        return xml.xslt(etree.parse(xsltf))
+        transform = etree.XSLT(etree.parse(xsltf))
+        params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
+        return transform(xml, **params)
  
  
  def replace_characters(node):
  
  
  def replace_characters(node):
@@ -134,7 +150,7 @@ def find_annotations(annotations, source, part_no):
      for child in source:
          if child.tag in ('pe', 'pa', 'pt', 'pr'):
              annotation = deepcopy(child)
      for child in source:
          if child.tag in ('pe', 'pa', 'pt', 'pr'):
              annotation = deepcopy(child)
-            number = str(len(annotations)+1)
+            number = str(len(annotations) + 1)
              annotation.set('number', number)
              annotation.set('part', str(part_no))
              annotation.tail = ''
              annotation.set('number', number)
              annotation.set('part', str(part_no))
              annotation.tail = ''
@@ -159,7 +175,7 @@ class Stanza(object):
      >>> print etree.tostring(s)
      <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
      >>> print etree.tostring(s)
      <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
      y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
-    
+
      """
      def __init__(self, stanza_elem):
          self.stanza = stanza_elem
      """
      def __init__(self, stanza_elem):
          self.stanza = stanza_elem
@@ -191,6 +207,8 @@ class Stanza(object):
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
              if i:
                  self.open_normal_verse()
          for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
              if i:
                  self.open_normal_verse()
+            if not verse_text.strip():
+                continue
              verse = self.get_open_verse()
              if len(verse):
                  verse[-1].tail = (verse[-1].tail or "") + verse_text
              verse = self.get_open_verse()
              if len(verse):
                  verse[-1].tail = (verse[-1].tail or "") + verse_text
@@ -221,18 +239,17 @@ def add_to_manifest(manifest, partno):
      """ Adds a node to the manifest section in content.opf file """
  
      partstr = 'part%d' % partno
      """ Adds a node to the manifest section in content.opf file """
  
      partstr = 'part%d' % partno
-    e = manifest.makeelement(OPFNS('item'), attrib={
-                                 'id': partstr,
-                                 'href': partstr + '.html',
-                                 'media-type': 'application/xhtml+xml',
-                             })
+    e = manifest.makeelement(
+        OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
+                               'media-type': 'application/xhtml+xml'}
+    )
      manifest.append(e)
  
  
  def add_to_spine(spine, partno):
      """ Adds a node to the spine section in content.opf file """
  
      manifest.append(e)
  
  
  def add_to_spine(spine, partno):
      """ Adds a node to the spine section in content.opf file """
  
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
+    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
      spine.append(e)
  
  
      spine.append(e)
  
  
@@ -246,7 +263,7 @@ class TOC(object):
      def add(self, name, part_href, level=0, is_part=True, index=None):
          assert level == 0 or index is None
          if level > 0 and self.children:
      def add(self, name, part_href, level=0, is_part=True, index=None):
          assert level == 0 or index is None
          if level > 0 and self.children:
-            return self.children[-1].add(name, part_href, level-1, is_part)
+            return self.children[-1].add(name, part_href, level - 1, is_part)
          else:
              t = TOC(name)
              t.part_href = part_href
          else:
              t = TOC(name)
              t.part_href = part_href
@@ -304,7 +321,7 @@ class TOC(object):
              texts.append(
                  "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
                  (depth, child.href(), child.name))
              texts.append(
                  "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
                  (depth, child.href(), child.name))
-            texts.append(child.html_part(depth+1))
+            texts.append(child.html_part(depth + 1))
          return "\n".join(texts)
  
      def html(self):
          return "\n".join(texts)
  
      def html(self):
@@ -327,10 +344,10 @@ def chop(main_text):
      # prepare a container for each chunk
      part_xml = etree.Element('utwor')
      etree.SubElement(part_xml, 'master')
      # prepare a container for each chunk
      part_xml = etree.Element('utwor')
      etree.SubElement(part_xml, 'master')
-    main_xml_part = part_xml[0] # master
+    main_xml_part = part_xml[0]  # master
  
      last_node_part = False
  
      last_node_part = False
-    
+
      # the below loop are workaround for a problem with epubs in drama ebooks without acts
      is_scene = False
      is_act = False
      # the below loop are workaround for a problem with epubs in drama ebooks without acts
      is_scene = False
      is_act = False
@@ -340,7 +357,7 @@ def chop(main_text):
              is_scene = True
          elif name == 'naglowek_akt':
              is_act = True
              is_scene = True
          elif name == 'naglowek_akt':
              is_act = True
-    
+
      for one_part in main_text:
          name = one_part.tag
          if is_act is False and is_scene is True:
      for one_part in main_text:
          name = one_part.tag
          if is_act is False and is_scene is True:
@@ -364,7 +381,7 @@ def chop(main_text):
                  main_xml_part[:] = [deepcopy(one_part)]
              else:
                  main_xml_part.append(deepcopy(one_part))
                  main_xml_part[:] = [deepcopy(one_part)]
              else:
                  main_xml_part.append(deepcopy(one_part))
-                last_node_part = False            
+                last_node_part = False
      yield part_xml
  
  
      yield part_xml
  
  
@@ -390,17 +407,17 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
          replace_by_verse(chunk_xml)
          html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
          chars = used_chars(html_tree.getroot())
          replace_by_verse(chunk_xml)
          html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
          chars = used_chars(html_tree.getroot())
-        output_html = etree.tostring(html_tree, pretty_print = True,
-                    xml_declaration = True,
-                    encoding = "utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">')
+        output_html = etree.tostring(
+            html_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
+                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+        )
      return output_html, toc, chars
  
  
      return output_html, toc, chars
  
  
-def transform(wldoc, verbose=False,
-              style=None, html_toc=False,
-              sample=None, cover=None, flags=None):
+def transform(wldoc, verbose=False, style=None, html_toc=False,
+              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
      """ produces a EPUB file
  
      sample=n: generate sample e-book (with at least n paragraphs)
@@ -412,25 +429,25 @@ def transform(wldoc, verbose=False,
          """ processes one input file and proceeds to its children """
  
          replace_characters(wldoc.edoc.getroot())
          """ processes one input file and proceeds to its children """
  
          replace_characters(wldoc.edoc.getroot())
-        
-        hyphenator = set_hyph_language(wldoc.edoc.getroot())
+
+        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
-        
-        
+
          # every input file will have a TOC entry,
          # pointing to starting chunk
          toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
          chars = set()
          if first:
              # write book title page
          # every input file will have a TOC entry,
          # pointing to starting chunk
          toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
          chars = set()
          if first:
              # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
              chars = used_chars(html_tree.getroot())
              chars = used_chars(html_tree.getroot())
-            zip.writestr('OPS/title.html',
-                 etree.tostring(html_tree, pretty_print = True,
-                       xml_declaration = True,
-                    encoding = "utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
+            html_string = etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+            )
+            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
          elif wldoc.book_info.parts:
              # add a title page TOC entry
              toc.add(u"Strona tytułowa", "title.html")
          elif wldoc.book_info.parts:
@@ -441,13 +458,13 @@ def transform(wldoc, verbose=False,
              else:
                  html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                  chars = used_chars(html_tree.getroot())
              else:
                  html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
                  chars = used_chars(html_tree.getroot())
-                html_string = etree.tostring(html_tree, 
-                       pretty_print = True,
-                    xml_declaration = True,
-                    encoding = "utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">')
-            zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
+                html_string = etree.tostring(
+                    html_tree, pretty_print=True, xml_declaration=True,
+                    encoding="utf-8",
+                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
+                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                )
+            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
              add_to_manifest(manifest, chunk_counter)
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
              add_to_manifest(manifest, chunk_counter)
              add_to_spine(spine, chunk_counter)
              chunk_counter += 1
@@ -473,7 +490,7 @@ def transform(wldoc, verbose=False,
  
                  toc.extend(chunk_toc)
                  chars = chars.union(chunk_chars)
  
                  toc.extend(chunk_toc)
                  chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
+                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
                  add_to_manifest(manifest, chunk_counter)
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
                  add_to_manifest(manifest, chunk_counter)
                  add_to_spine(spine, chunk_counter)
                  chunk_counter += 1
@@ -486,7 +503,6 @@ def transform(wldoc, verbose=False,
  
          return toc, chunk_counter, chars, sample
  
  
          return toc, chunk_counter, chars, sample
  
-
      document = deepcopy(wldoc)
      del wldoc
  
      document = deepcopy(wldoc)
      del wldoc
  
@@ -494,9 +510,14 @@ def transform(wldoc, verbose=False,
          for flag in flags:
              document.edoc.getroot().set(flag, 'yes')
  
          for flag in flags:
              document.edoc.getroot().set(flag, 'yes')
  
+    document.clean_ed_note()
+    document.clean_ed_note('abstrakt')
+
      # add editors info
      # add editors info
-    document.edoc.getroot().set('editors', u', '.join(sorted(
-        editor.readable() for editor in document.editors())))
+    editors = document.editors()
+    if editors:
+        document.edoc.getroot().set('editors', u', '.join(sorted(
+            editor.readable() for editor in editors)))
      if document.book_info.funders:
          document.edoc.getroot().set('funders', u', '.join(
              document.book_info.funders))
      if document.book_info.funders:
          document.edoc.getroot().set('funders', u', '.join(
              document.book_info.funders))
@@ -511,26 +532,42 @@ def transform(wldoc, verbose=False,
      output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
      output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
      zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
  
+    functions.reg_mathml_epub(zip)
+
+    if os.path.isdir(ilustr_path):
+        for i, filename in enumerate(os.listdir(ilustr_path)):
+            file_path = os.path.join(ilustr_path, filename)
+            zip.write(file_path, os.path.join('OPS', filename))
+            image_id = 'image%s' % i
+            manifest.append(etree.fromstring(
+                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+
      # write static elements
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      mime.compress_type = zipfile.ZIP_STORED
      mime.extra = ''
      zip.writestr(mime, 'application/epub+zip')
      # write static elements
      mime = zipfile.ZipInfo()
      mime.filename = 'mimetype'
      mime.compress_type = zipfile.ZIP_STORED
      mime.extra = ''
      zip.writestr(mime, 'application/epub+zip')
-    zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
-                       'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
-                       '<rootfiles><rootfile full-path="OPS/content.opf" ' \
-                       'media-type="application/oebps-package+xml" />' \
-                       '</rootfiles></container>')
-    zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
-    zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
+    zip.writestr(
+        'META-INF/container.xml',
+        '<?xml version="1.0" ?>'
+        '<container version="1.0" '
+        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
+        '<rootfiles><rootfile full-path="OPS/content.opf" '
+        'media-type="application/oebps-package+xml" />'
+        '</rootfiles></container>'
+    )
+    zip.write(get_resource('res/wl-logo-small.png'),
+              os.path.join('OPS', 'logo_wolnelektury.png'))
+    zip.write(get_resource('res/jedenprocent.png'),
+              os.path.join('OPS', 'jedenprocent.png'))
      if not style:
          style = get_resource('epub/style.css')
      zip.write(style, os.path.join('OPS', 'style.css'))
  
      if cover:
          if cover is True:
      if not style:
          style = get_resource('epub/style.css')
      zip.write(style, os.path.join('OPS', 'style.css'))
  
      if cover:
          if cover is True:
-            cover = DefaultEbookCover
+            cover = make_cover
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
  
          cover_file = StringIO()
          bound_cover = cover(document.book_info)
@@ -542,9 +579,11 @@ def transform(wldoc, verbose=False,
          cover_tree = etree.parse(get_resource('epub/cover.html'))
          cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
          zip.writestr('OPS/cover.html', etree.tostring(
          cover_tree = etree.parse(get_resource('epub/cover.html'))
          cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
          zip.writestr('OPS/cover.html', etree.tostring(
-                        cover_tree, pretty_print = True, xml_declaration = True, encoding = "utf-8",
-                       doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
+            cover_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
+                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+        ))
  
          if bound_cover.uses_dc_cover:
              if document.book_info.cover_by:
  
          if bound_cover.uses_dc_cover:
              if document.book_info.cover_by:
@@ -560,14 +599,16 @@ def transform(wldoc, verbose=False,
          opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
          guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
  
          opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
          guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
  
-
      annotations = etree.Element('annotations')
  
      annotations = etree.Element('annotations')
  
-    toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
-                               '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
-                               '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
-                               'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
-                               '</navMap></ncx>')
+    toc_file = etree.fromstring(
+        '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
+        '"-//NISO//DTD ncx 2005-1//EN" '
+        '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
+        '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
+        'version="2005-1"><head></head><docTitle></docTitle><navMap>'
+        '</navMap></ncx>'
+    )
      nav_map = toc_file[-1]
  
      if html_toc:
      nav_map = toc_file[-1]
  
      if html_toc:
@@ -593,11 +634,11 @@ def transform(wldoc, verbose=False,
          html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
          chars = chars.union(used_chars(html_tree.getroot()))
          zip.writestr('OPS/annotations.html', etree.tostring(
          html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
          chars = chars.union(used_chars(html_tree.getroot()))
          zip.writestr('OPS/annotations.html', etree.tostring(
-                            html_tree, pretty_print = True,
-                               xml_declaration = True,
-                               encoding = "utf-8",
-                               doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
+            html_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
+                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+        ))
  
      toc.add("Wesprzyj Wolne Lektury", "support.html")
      manifest.append(etree.fromstring(
  
      toc.add("Wesprzyj Wolne Lektury", "support.html")
      manifest.append(etree.fromstring(
@@ -606,23 +647,23 @@ def transform(wldoc, verbose=False,
          '<itemref idref="support" />'))
      html_string = open(get_resource('epub/support.html')).read()
      chars.update(used_chars(etree.fromstring(html_string)))
          '<itemref idref="support" />'))
      html_string = open(get_resource('epub/support.html')).read()
      chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', html_string)
+    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
  
      toc.add("Strona redakcyjna", "last.html")
      manifest.append(etree.fromstring(
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
  
      toc.add("Strona redakcyjna", "last.html")
      manifest.append(etree.fromstring(
          '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
      spine.append(etree.fromstring(
          '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
      chars.update(used_chars(html_tree.getroot()))
      chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', etree.tostring(
-                        html_tree, pretty_print = True,
-                       xml_declaration = True,
-                       encoding = "utf-8",
-                       doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                            '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
-
-    if not flags or not 'without-fonts' in flags:
+    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
+        html_tree, pretty_print=True, xml_declaration=True,
+        encoding="utf-8",
+        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
+                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+    )))
+
+    if not flags or 'without-fonts' not in flags:
          # strip fonts
          tmpdir = mkdtemp('-librarian-epub')
          try:
          # strip fonts
          tmpdir = mkdtemp('-librarian-epub')
          try:
@@ -632,22 +673,25 @@ def transform(wldoc, verbose=False,
  
          os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
          for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
  
          os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
          for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
-            optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
-                          get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]              
+            optimizer_call = ['perl', 'subset.pl', '--chars',
+                              ''.join(chars).encode('utf-8'),
+                              get_resource('fonts/' + fname),
+                              os.path.join(tmpdir, fname)]
+            env = {"PERL_USE_UNSAFE_INC": "1"}
              if verbose:
                  print "Running font-optimizer"
              if verbose:
                  print "Running font-optimizer"
-                subprocess.check_call(optimizer_call)
+                subprocess.check_call(optimizer_call, env=env)
              else:
              else:
-                subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                dev_null = open(os.devnull, 'w')
+                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
          rmtree(tmpdir)
          if cwd is not None:
              os.chdir(cwd)
              zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
              manifest.append(etree.fromstring(
                  '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
          rmtree(tmpdir)
          if cwd is not None:
              os.chdir(cwd)
-    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print = True,
-                    xml_declaration = True,
-                    encoding = "utf-8"))
+    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
+                 xml_declaration=True, encoding="utf-8"))
      title = document.book_info.title
      attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
      for st in attributes:
      title = document.book_info.title
      attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
      for st in attributes:
@@ -664,9 +708,8 @@ def transform(wldoc, verbose=False,
          toc.add(u"Spis treści", "toc.html", index=1)
          zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
      toc.write_to_xml(nav_map)
          toc.add(u"Spis treści", "toc.html", index=1)
          zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
      toc.write_to_xml(nav_map)
-    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print = True,
-                    xml_declaration = True,
-                    encoding = "utf-8"))
+    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
+                 xml_declaration=True, encoding="utf-8"))
      zip.close()
  
      return OutputFile.from_filename(output_file.name)
      zip.close()
  
      return OutputFile.from_filename(output_file.name)