New EPUB builder, other minor changes.

[librarian.git] / src / librarian / epub.py
diff --git a/src/librarian/epub.py b/src/librarian/epub.py

index be9488a..0fb91e5 100644 (file)
--- a/src/librarian/epub.py
+++ b/src/librarian/epub.py
@@ -9,16 +9,17 @@ import os
  import os.path
  import re
  import subprocess
  import os.path
  import re
  import subprocess
-from six import BytesIO
+import six
  from copy import deepcopy
  from mimetypes import guess_type
  
  from copy import deepcopy
  from mimetypes import guess_type
  
+from ebooklib import epub
  from lxml import etree
  from lxml import etree
-import zipfile
+from PIL import Image
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
  from tempfile import mkdtemp, NamedTemporaryFile
  from shutil import rmtree
  
-from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
+from librarian import RDFNS, WLNS, DCNS, OutputFile
  from librarian.cover import make_cover
  
  from librarian import functions, get_resource
  from librarian.cover import make_cover
  
  from librarian import functions, get_resource
@@ -26,29 +27,17 @@ from librarian import functions, get_resource
  from librarian.hyphenator import Hyphenator
  
  functions.reg_person_name()
  from librarian.hyphenator import Hyphenator
  
  functions.reg_person_name()
-functions.reg_lang_code_3to2()
  
  
  def squeeze_whitespace(s):
  
  
  def squeeze_whitespace(s):
+    return s
      return re.sub(b'\\s+', b' ', s)
  
  
  def set_hyph_language(source_tree):
      return re.sub(b'\\s+', b' ', s)
  
  
  def set_hyph_language(source_tree):
-    def get_short_lng_code(text):
-        result = ''
-        text = ''.join(text)
-        with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f.read().decode('latin1').split('\n'):
-                list = line.strip().split('|')
-                if list[0] == text:
-                    result = list[2]
-        if result == '':
-            return text
-        else:
-            return result
      bibl_lng = etree.XPath('//dc:language//text()',
                             namespaces={'dc': str(DCNS)})(source_tree)
      bibl_lng = etree.XPath('//dc:language//text()',
                             namespaces={'dc': str(DCNS)})(source_tree)
-    short_lng = get_short_lng_code(bibl_lng[0])
+    short_lng = functions.lang_code_3to2(bibl_lng[0])
      try:
          return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
                                         short_lng + '.dic'))
      try:
          return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
                                         short_lng + '.dic'))
@@ -74,31 +63,6 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
              parent.tail = newt
  
  
              parent.tail = newt
  
  
-def inner_xml(node):
-    """ returns node's text and children as a string
-
-    >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
-    x<b>y</b>z
-    """
-
-    nt = node.text if node.text is not None else ''
-    return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
-
-
-def set_inner_xml(node, text):
-    """ sets node's text and children from a string
-
-    >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
-    >>> set_inner_xml(e, 'x<b>y</b>z')
-    >>> print(etree.tostring(e, encoding='unicode'))
-    <a>x<b>y</b>z</a>
-    """
-
-    p = etree.fromstring('<x>%s</x>' % text)
-    node.text = p.text
-    node[:] = p[:]
-
-
  def node_name(node):
      """ Find out a node's name
  
  def node_name(node):
      """ Find out a node's name
  
@@ -122,7 +86,10 @@ def xslt(xml, sheet, **kwargs):
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
          transform = etree.XSLT(etree.parse(xsltf))
          xml = etree.ElementTree(xml)
      with open(sheet) as xsltf:
          transform = etree.XSLT(etree.parse(xsltf))
-        params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
+        params = dict(
+            (key, transform.strparam(value))
+            for key, value in kwargs.items()
+        )
          return transform(xml, **params)
  
  
          return transform(xml, **params)
  
  
@@ -170,11 +137,17 @@ class Stanza(object):
      Slashes may only occur directly in the stanza. Any slashes in subelements
      will be ignored, and the subelements will be put inside verse elements.
  
      Slashes may only occur directly in the stanza. Any slashes in subelements
      will be ignored, and the subelements will be put inside verse elements.
  
-    >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
+    >>> s = etree.fromstring(
+    ...         "<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>"
+    ...     )
      >>> Stanza(s).versify()
      >>> Stanza(s).versify()
-    >>> print(etree.tostring(s, encoding='unicode'))
-    <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
-    y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
+    >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip())
+    <strofa>
+      <wers_normalny>a <b>c</b><b>c</b></wers_normalny>
+      <wers_normalny>b<x>x/
+    y</x>c</wers_normalny>
+      <wers_normalny>d</wers_normalny>
+    </strofa>
  
      """
      def __init__(self, stanza_elem):
  
      """
      def __init__(self, stanza_elem):
@@ -190,7 +163,10 @@ class Stanza(object):
          tail = self.stanza.tail
          self.stanza.clear()
          self.stanza.tail = tail
          tail = self.stanza.tail
          self.stanza.clear()
          self.stanza.tail = tail
-        self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0)
+        self.stanza.extend(
+            verse for verse in self.verses
+            if verse.text or len(verse) > 0
+        )
  
      def open_normal_verse(self):
          self.open_verse = self.stanza.makeelement("wers_normalny")
  
      def open_normal_verse(self):
          self.open_verse = self.stanza.makeelement("wers_normalny")
@@ -235,101 +211,6 @@ def replace_by_verse(tree):
          Stanza(stanza).versify()
  
  
          Stanza(stanza).versify()
  
  
-def add_to_manifest(manifest, partno):
-    """ Adds a node to the manifest section in content.opf file """
-
-    partstr = 'part%d' % partno
-    e = manifest.makeelement(
-        OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
-                               'media-type': 'application/xhtml+xml'}
-    )
-    manifest.append(e)
-
-
-def add_to_spine(spine, partno):
-    """ Adds a node to the spine section in content.opf file """
-
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
-    spine.append(e)
-
-
-class TOC(object):
-    def __init__(self, name=None, part_href=None):
-        self.children = []
-        self.name = name
-        self.part_href = part_href
-        self.sub_number = None
-
-    def add(self, name, part_href, level=0, is_part=True, index=None):
-        assert level == 0 or index is None
-        if level > 0 and self.children:
-            return self.children[-1].add(name, part_href, level - 1, is_part)
-        else:
-            t = TOC(name)
-            t.part_href = part_href
-            if index is not None:
-                self.children.insert(index, t)
-            else:
-                self.children.append(t)
-            if not is_part:
-                t.sub_number = len(self.children) + 1
-                return t.sub_number
-
-    def append(self, toc):
-        self.children.append(toc)
-
-    def extend(self, toc):
-        self.children.extend(toc.children)
-
-    def depth(self):
-        if self.children:
-            return max((c.depth() for c in self.children)) + 1
-        else:
-            return 0
-
-    def href(self):
-        src = self.part_href
-        if self.sub_number is not None:
-            src += '#sub%d' % self.sub_number
-        return src
-
-    def write_to_xml(self, nav_map, counter=1):
-        for child in self.children:
-            nav_point = nav_map.makeelement(NCXNS('navPoint'))
-            nav_point.set('id', 'NavPoint-%d' % counter)
-            nav_point.set('playOrder', str(counter))
-
-            nav_label = nav_map.makeelement(NCXNS('navLabel'))
-            text = nav_map.makeelement(NCXNS('text'))
-            if child.name is not None:
-                text.text = re.sub(r'\n', ' ', child.name)
-            else:
-                text.text = child.name
-            nav_label.append(text)
-            nav_point.append(nav_label)
-
-            content = nav_map.makeelement(NCXNS('content'))
-            content.set('src', child.href())
-            nav_point.append(content)
-            nav_map.append(nav_point)
-            counter = child.write_to_xml(nav_point, counter + 1)
-        return counter
-
-    def html_part(self, depth=0):
-        texts = []
-        for child in self.children:
-            texts.append(
-                "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
-                (depth, child.href(), child.name))
-            texts.append(child.html_part(depth + 1))
-        return "\n".join(texts)
-
-    def html(self):
-        with open(get_resource('epub/toc.html'), 'rb') as f:
-            t = f.read().decode('utf-8')
-        return t % self.html_part()
-
-
  def used_chars(element):
      """ Lists characters used in an ETree Element """
      chars = set((element.text or '') + (element.tail or ''))
  def used_chars(element):
      """ Lists characters used in an ETree Element """
      chars = set((element.text or '') + (element.tail or ''))
@@ -348,7 +229,8 @@ def chop(main_text):
  
      last_node_part = False
  
  
      last_node_part = False
  
-    # the below loop are workaround for a problem with epubs in drama ebooks without acts
+    # The below loop are workaround for a problem with epubs
+    # in drama ebooks without acts.
      is_scene = False
      is_act = False
      for one_part in main_text:
      is_scene = False
      is_act = False
      for one_part in main_text:
@@ -376,7 +258,10 @@ def chop(main_text):
                  yield part_xml
                  last_node_part = True
                  main_xml_part[:] = [deepcopy(one_part)]
                  yield part_xml
                  last_node_part = True
                  main_xml_part[:] = [deepcopy(one_part)]
-            elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+            elif (not last_node_part
+                  and name in (
+                      "naglowek_rozdzial", "naglowek_akt", "srodtytul"
+                  )):
                  yield part_xml
                  main_xml_part[:] = [deepcopy(one_part)]
              else:
                  yield part_xml
                  main_xml_part[:] = [deepcopy(one_part)]
              else:
@@ -385,21 +270,63 @@ def chop(main_text):
      yield part_xml
  
  
      yield part_xml
  
  
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
-    """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
+                    _empty_html_static=[]):
+    """
+    Transforms one chunk, returns a HTML string, a TOC object
+    and a set of used characters.
+    """
  
  
-    toc = TOC()
+    toc = []
      for element in chunk_xml[0]:
          if element.tag == "naglowek_czesc":
      for element in chunk_xml[0]:
          if element.tag == "naglowek_czesc":
-            toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml#book-text" % chunk_no,
+                        node_name(element),
+                        "part%d-text" % chunk_no
+                    ),
+                    []
+                )
+            )
          elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
          elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
-            toc.add(node_name(element), "part%d.html" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml" % chunk_no,
+                        node_name(element),
+                        "part%d" % chunk_no
+                    ),
+                    []
+                )
+            )
          elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
          elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
-            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
-            element.set('sub', str(subnumber))
+            if not toc:
+                toc.append(
+                    (
+                        epub.Link(
+                            "part%d.xhtml" % chunk_no,
+                            " ",
+                            "part%d" % chunk_no
+                        ),
+                        []
+                    )
+                )
+
+            subnumber = len(toc[-1][1])
+            toc[-1][1].append(
+                epub.Link(
+                    "part%d.xhtml#sub%d" % (chunk_no, subnumber),
+                    node_name(element),
+                    "part%d-sub%d" % (chunk_no, subnumber)
+                )
+            )
+            element.set('sub', six.text_type(subnumber))
      if empty:
          if not _empty_html_static:
      if empty:
          if not _empty_html_static:
-            _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
+            with open(get_resource('epub/emptyChunk.xhtml')) as f:
+                _empty_html_static.append(f.read())
          chars = set()
          output_html = _empty_html_static[0]
      else:
          chars = set()
          output_html = _empty_html_static[0]
      else:
@@ -410,63 +337,114 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
          output_html = etree.tostring(
              html_tree, pretty_print=True, xml_declaration=True,
              encoding="utf-8",
          output_html = etree.tostring(
              html_tree, pretty_print=True, xml_declaration=True,
              encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+            doctype='<!DOCTYPE html>'
          )
      return output_html, toc, chars
  
  
          )
      return output_html, toc, chars
  
  
-def transform(wldoc, verbose=False, style=None, html_toc=False,
-              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
-    """ produces a EPUB file
+def remove_empty_lists_from_toc(toc):
+    for i, e in enumerate(toc):
+        if isinstance(e, tuple):
+            if e[1]:
+                remove_empty_lists_from_toc(e[1])
+            else:
+                toc[i] = e[0]
+
  
  
-    sample=n: generate sample e-book (with at least n paragraphs)
-    cover: a cover.Cover factory or True for default
-    flags: less-advertising, without-fonts, working-copy
-    """
  
  
-    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
+def transform_file(wldoc, chunk_counter=1, first=True, sample=None, hyphenate=False, output_type='epub', spine=None, output=None, annotations=None):
          """ processes one input file and proceeds to its children """
  
          replace_characters(wldoc.edoc.getroot())
  
          """ processes one input file and proceeds to its children """
  
          replace_characters(wldoc.edoc.getroot())
  
-        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
+        hyphenator = set_hyph_language(
+            wldoc.edoc.getroot()
+        ) if hyphenate else None
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
  
          # every input file will have a TOC entry,
          # pointing to starting chunk
          hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
  
          # every input file will have a TOC entry,
          # pointing to starting chunk
-        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
+        toc = [
+            (
+                epub.Link(
+                    "part%d.xhtml" % chunk_counter,
+                    wldoc.book_info.title,
+                    "path%d-start" % chunk_counter
+                ),
+                []
+            )
+        ]
          chars = set()
          if first:
              # write book title page
          chars = set()
          if first:
              # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'),
+                             outputtype=output_type)
              chars = used_chars(html_tree.getroot())
              html_string = etree.tostring(
                  html_tree, pretty_print=True, xml_declaration=True,
                  encoding="utf-8",
              chars = used_chars(html_tree.getroot())
              html_string = etree.tostring(
                  html_tree, pretty_print=True, xml_declaration=True,
                  encoding="utf-8",
-                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                doctype='<!DOCTYPE html>'
+            )
+            item = epub.EpubItem(
+                uid="titlePage",
+                file_name="title.xhtml",
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
              )
              )
-            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
+            spine.append(item)
+            output.add_item(item)
              # add a title page TOC entry
              # add a title page TOC entry
-            toc.add(u"Strona tytułowa", "title.html")
+            toc[-1][1].append(
+                epub.Link(
+                    "title.xhtml",
+                    "Strona tytułowa",
+                    "title",
+                )
+            )
+
+            item = epub.EpubNav()
+            toc[-1][1].append(
+                epub.Link(
+                    "nav.xhtml",
+                    "Spis treści",
+                    "nav"
+                )
+            )
+            output.add_item(item)
+            spine.append(item)
+
+            toc[-1][1].append(
+                epub.Link(
+                    "part1.xhtml",
+                    "Początek utworu",
+                    "part1"
+                )
+            )
+
          elif wldoc.book_info.parts:
              # write title page for every parent
              if sample is not None and sample <= 0:
                  chars = set()
          elif wldoc.book_info.parts:
              # write title page for every parent
              if sample is not None and sample <= 0:
                  chars = set()
-                html_string = open(get_resource('epub/emptyChunk.html')).read()
+                html_string = open(
+                    get_resource('epub/emptyChunk.xhtml')).read()
              else:
              else:
-                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
+                html_tree = xslt(wldoc.edoc,
+                                 get_resource('epub/xsltChunkTitle.xsl'))
                  chars = used_chars(html_tree.getroot())
                  html_string = etree.tostring(
                      html_tree, pretty_print=True, xml_declaration=True,
                      encoding="utf-8",
                  chars = used_chars(html_tree.getroot())
                  html_string = etree.tostring(
                      html_tree, pretty_print=True, xml_declaration=True,
                      encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                    doctype='<!DOCTYPE html>'
                  )
                  )
-            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
-            add_to_manifest(manifest, chunk_counter)
-            add_to_spine(spine, chunk_counter)
+            item = epub.EpubItem(
+                uid="part%d" % chunk_counter,
+                file_name="part%d.xhtml" % chunk_counter,
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
+            )
+            output.add_item(item)
+            spine.append(item)
+
              chunk_counter += 1
  
          if len(wldoc.edoc.getroot()) > 1:
              chunk_counter += 1
  
          if len(wldoc.edoc.getroot()) > 1:
@@ -485,24 +463,47 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                      if sample <= 0:
                          empty = True
                      else:
                      if sample <= 0:
                          empty = True
                      else:
-                        sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
-                chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
+                        sample -= len(chunk_xml.xpath(
+                            '//strofa|//akap|//akap_cd|//akap_dialog'
+                        ))
+                chunk_html, chunk_toc, chunk_chars = transform_chunk(
+                    chunk_xml, chunk_counter, annotations, empty)
  
  
-                toc.extend(chunk_toc)
+                toc[-1][1].extend(chunk_toc)
                  chars = chars.union(chunk_chars)
                  chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
-                add_to_manifest(manifest, chunk_counter)
-                add_to_spine(spine, chunk_counter)
+                item = epub.EpubItem(
+                    uid="part%d" % chunk_counter,
+                    file_name="part%d.xhtml" % chunk_counter,
+                    media_type="application/xhtml+xml",
+                    content=squeeze_whitespace(chunk_html)
+                )
+                output.add_item(item)
+                spine.append(item)
                  chunk_counter += 1
  
          for child in wldoc.parts():
              child_toc, chunk_counter, chunk_chars, sample = transform_file(
                  chunk_counter += 1
  
          for child in wldoc.parts():
              child_toc, chunk_counter, chunk_chars, sample = transform_file(
-                child, chunk_counter, first=False, sample=sample)
-            toc.append(child_toc)
+                child, chunk_counter, first=False, sample=sample,
+                hyphenate=hyphenate, output_type=output_type,
+                spine=spine, output=output, annotations=annotations,
+            )
+            toc[-1][1].extend(child_toc)
              chars = chars.union(chunk_chars)
  
          return toc, chunk_counter, chars, sample
  
              chars = chars.union(chunk_chars)
  
          return toc, chunk_counter, chars, sample
  
+                
+def transform(wldoc, verbose=False, style=None,
+              sample=None, cover=None, flags=None, hyphenate=False,
+              base_url='file://./', output_type='epub'):
+    """ produces a EPUB file
+
+    sample=n: generate sample e-book (with at least n paragraphs)
+    cover: a cover.Cover factory or True for default
+    flags: less-advertising, without-fonts, working-copy
+    """
+
+
      document = deepcopy(wldoc)
      del wldoc
  
      document = deepcopy(wldoc)
      del wldoc
  
@@ -524,147 +525,214 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
      if document.book_info.thanks:
          document.edoc.getroot().set('thanks', document.book_info.thanks)
  
      if document.book_info.thanks:
          document.edoc.getroot().set('thanks', document.book_info.thanks)
  
-    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
-    manifest = opf.find('.//' + OPFNS('manifest'))
-    guide = opf.find('.//' + OPFNS('guide'))
-    spine = opf.find('.//' + OPFNS('spine'))
+    output = epub.EpubBook()
+    output.set_identifier(six.text_type(document.book_info.url))
+    output.set_language(functions.lang_code_3to2(document.book_info.language))
+    output.set_title(document.book_info.title)
+    for i, author in enumerate(document.book_info.authors):
+        output.add_author(
+            author.readable(),
+            file_as=six.text_type(author),
+            uid='creator{}'.format(i)
+        )
+    for translator in document.book_info.translators:
+        output.add_author(
+            translator.readable(),
+            file_as=six.text_type(translator),
+            role='trl',
+            uid='translator{}'.format(i)
+        )
+    for publisher in document.book_info.publisher:
+        output.add_metadata("DC", "publisher", publisher)
+    output.add_metadata("DC", "date", document.book_info.created_at)
  
  
-    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
-    zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
+    output.guide.append({
+        "type": "text",
+        "title": "Początek",
+        "href": "part1.xhtml"
+    })
  
  
-    functions.reg_mathml_epub(zip)
+    output.add_item(epub.EpubNcx())
  
  
-    if os.path.isdir(ilustr_path):
-        ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr'))
-        for i, filename in enumerate(os.listdir(ilustr_path)):
-            if filename not in ilustr_elements:
-                continue
-            file_path = os.path.join(ilustr_path, filename)
-            zip.write(file_path, os.path.join('OPS', filename))
-            image_id = 'image%s' % i
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+    spine = output.spine
+
+    functions.reg_mathml_epub(output)
+
+    # FIXME
+    for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
+        url = six.moves.urllib.parse.urljoin(
+            base_url,
+            ilustr.get('src')
+        )
+        imgfile = six.moves.urllib.request.urlopen(url)
+        img = Image.open(imgfile)
+
+        th_format, ext, media_type = {
+            'GIF': ('GIF', 'gif', 'image/gif'),
+            'PNG': ('PNG', 'png', 'image/png'),
+        }.get(img.format, ('JPEG', 'jpg', 'image/jpeg'))
  
  
+        width = 1200
+        if img.size[0] < width:
+            th = img
+        else:
+            th = img.resize((width, round(width * img.size[1] / img.size[0])))
+
+        imgfile.close()
+            
+        buffer = six.BytesIO()
+        th.save(buffer, format=th_format)
+
+        file_name = 'image%d.%s' % (i, ext)
+        ilustr.set('src', file_name)
+        output.add_item(
+            epub.EpubItem(
+                uid='image%s' % i,
+                file_name=file_name,
+                media_type=media_type,
+                content=buffer.getvalue()
+            )
+        )
+            
      # write static elements
      # write static elements
-    mime = zipfile.ZipInfo()
-    mime.filename = 'mimetype'
-    mime.compress_type = zipfile.ZIP_STORED
-    mime.extra = b''
-    zip.writestr(mime, b'application/epub+zip')
-    zip.writestr(
-        'META-INF/container.xml',
-        b'<?xml version="1.0" ?>'
-        b'<container version="1.0" '
-        b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
-        b'<rootfiles><rootfile full-path="OPS/content.opf" '
-        b'media-type="application/oebps-package+xml" />'
-        b'</rootfiles></container>'
-    )
-    zip.write(get_resource('res/wl-logo-small.png'),
-              os.path.join('OPS', 'logo_wolnelektury.png'))
-    zip.write(get_resource('res/jedenprocent.png'),
-              os.path.join('OPS', 'jedenprocent.png'))
+
+    with open(get_resource('res/wl-logo-small.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="logo_wolnelektury.png",
+                file_name="logo_wolnelektury.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+    with open(get_resource('res/jedenprocent.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="jedenprocent",
+                file_name="jedenprocent.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+
      if not style:
          style = get_resource('epub/style.css')
      if not style:
          style = get_resource('epub/style.css')
-    zip.write(style, os.path.join('OPS', 'style.css'))
+    with open(style, 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="style",
+                file_name="style.css",
+                media_type="text/css",
+                content=f.read()
+            )
+        )
  
      if cover:
          if cover is True:
              cover = make_cover
  
  
      if cover:
          if cover is True:
              cover = make_cover
  
-        cover_file = BytesIO()
+        cover_file = six.BytesIO()
          bound_cover = cover(document.book_info)
          bound_cover.save(cover_file)
          cover_name = 'cover.%s' % bound_cover.ext()
          bound_cover = cover(document.book_info)
          bound_cover.save(cover_file)
          cover_name = 'cover.%s' % bound_cover.ext()
-        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
-        del cover_file
  
  
-        cover_tree = etree.parse(get_resource('epub/cover.html'))
-        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
-        zip.writestr('OPS/cover.html', etree.tostring(
-            cover_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
+        output.set_cover(
+            file_name=cover_name,
+            content=cover_file.getvalue(),
+        )
+        spine.append('cover')
+        output.guide.append({
+            "type": "cover",
+            "href": "cover.xhtml",
+            "title": "Okładka",
+        })
+
+        del cover_file
  
          if bound_cover.uses_dc_cover:
              if document.book_info.cover_by:
  
          if bound_cover.uses_dc_cover:
              if document.book_info.cover_by:
-                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+                document.edoc.getroot().set('data-cover-by',
+                                            document.book_info.cover_by)
              if document.book_info.cover_source:
              if document.book_info.cover_source:
-                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
-
-        manifest.append(etree.fromstring(
-            '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
-        manifest.append(etree.fromstring(
-            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
-        spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
-        opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
-        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
+                document.edoc.getroot().set('data-cover-source',
+                                            document.book_info.cover_source)
  
      annotations = etree.Element('annotations')
  
  
      annotations = etree.Element('annotations')
  
-    toc_file = etree.fromstring(
-        b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
-        b'"-//NISO//DTD ncx 2005-1//EN" '
-        b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
-        b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
-        b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
-        b'</navMap></ncx>'
+    toc, chunk_counter, chars, sample = transform_file(
+        document, sample=sample,
+        hyphenate=hyphenate, output_type=output_type,
+        spine=spine, output=output, annotations=annotations
      )
      )
-    nav_map = toc_file[-1]
-
-    if html_toc:
-        manifest.append(etree.fromstring(
-            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
-        spine.append(etree.fromstring(
-            '<itemref idref="html_toc" />'))
-        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
-
-    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
-
-    if len(toc.children) < 2:
-        toc.add(u"Początek utworu", "part1.html")
+    output.toc = toc[0][1]
  
      # Last modifications in container files and EPUB creation
      if len(annotations) > 0:
  
      # Last modifications in container files and EPUB creation
      if len(annotations) > 0:
-        toc.add("Przypisy", "annotations.html")
-        manifest.append(etree.fromstring(
-            '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
-        spine.append(etree.fromstring(
-            '<itemref idref="annotations" />'))
+        output.toc.append(
+            epub.Link(
+                "annotations.xhtml",
+                "Przypisy",
+                "annotations"
+            )
+        )
          replace_by_verse(annotations)
          html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
          chars = chars.union(used_chars(html_tree.getroot()))
          replace_by_verse(annotations)
          html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
          chars = chars.union(used_chars(html_tree.getroot()))
-        zip.writestr('OPS/annotations.html', etree.tostring(
-            html_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
  
  
-    toc.add("Wesprzyj Wolne Lektury", "support.html")
-    manifest.append(etree.fromstring(
-        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
-    spine.append(etree.fromstring(
-        '<itemref idref="support" />'))
-    html_string = open(get_resource('epub/support.html'), 'rb').read()
+        item = epub.EpubItem(
+            uid="annotations",
+            file_name="annotations.xhtml",
+            media_type="application/xhtml+xml",
+            content=etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html>'
+            )
+        )
+        output.add_item(item)
+        spine.append(item)
+
+    output.toc.append(
+        epub.Link(
+            "support.xhtml",
+            "Wesprzyj Wolne Lektury",
+            "support"
+        )
+    )
+    with open(get_resource('epub/support.xhtml'), 'rb') as f:
+        html_string = f.read()
      chars.update(used_chars(etree.fromstring(html_string)))
      chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
-
-    toc.add("Strona redakcyjna", "last.html")
-    manifest.append(etree.fromstring(
-        '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
-    spine.append(etree.fromstring(
-        '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
+    item = epub.EpubItem(
+        uid="support",
+        file_name="support.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(html_string)
+    )
+    output.add_item(item)
+    spine.append(item)
+
+    output.toc.append(
+        epub.Link(
+            "last.xhtml",
+            "Strona redakcyjna",
+            "last"
+        )
+    )
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
+                     outputtype=output_type)
      chars.update(used_chars(html_tree.getroot()))
      chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
-        html_tree, pretty_print=True, xml_declaration=True,
-        encoding="utf-8",
-        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    )))
+    item = epub.EpubItem(
+        uid="last",
+        file_name="last.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(etree.tostring(
+            html_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html>'
+        ))
+    )
+    output.add_item(item)
+    spine.append(item)
  
      if not flags or 'without-fonts' not in flags:
          # strip fonts
  
      if not flags or 'without-fonts' not in flags:
          # strip fonts
@@ -674,8 +742,10 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
          except OSError:
              cwd = None
  
          except OSError:
              cwd = None
  
-        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
-        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
+        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                              'font-optimizer'))
+        for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
+                      'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
              optimizer_call = ['perl', 'subset.pl', '--chars',
                                ''.join(chars).encode('utf-8'),
                                get_resource('fonts/' + fname),
              optimizer_call = ['perl', 'subset.pl', '--chars',
                                ''.join(chars).encode('utf-8'),
                                get_resource('fonts/' + fname),
@@ -686,33 +756,26 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                  subprocess.check_call(optimizer_call, env=env)
              else:
                  dev_null = open(os.devnull, 'w')
                  subprocess.check_call(optimizer_call, env=env)
              else:
                  dev_null = open(os.devnull, 'w')
-                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
-            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
+                subprocess.check_call(optimizer_call, stdout=dev_null,
+                                      stderr=dev_null, env=env)
+            with open(os.path.join(tmpdir, fname), 'rb') as f:
+                output.add_item(
+                    epub.EpubItem(
+                        uid=fname,
+                        file_name=fname,
+                        media_type="font/ttf",
+                        content=f.read()
+                    )
+                )
          rmtree(tmpdir)
          if cwd is not None:
              os.chdir(cwd)
          rmtree(tmpdir)
          if cwd is not None:
              os.chdir(cwd)
-    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    title = document.book_info.title
-    attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
-    for st in attributes:
-        meta = toc_file.makeelement(NCXNS('meta'))
-        meta.set('name', st)
-        meta.set('content', '0')
-        toc_file[0].append(meta)
-    toc_file[0][0].set('content', str(document.book_info.url))
-    toc_file[0][1].set('content', str(toc.depth()))
-    set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
-
-    # write TOC
-    if html_toc:
-        toc.add(u"Spis treści", "toc.html", index=1)
-        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
-    toc.write_to_xml(nav_map)
-    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    zip.close()
  
  
+    remove_empty_lists_from_toc(output.toc)
+    print(output.toc)
+
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
+                                     delete=False)
+    output_file.close()
+    epub.write_epub(output_file.name, output, {'epub3_landmark': False})
      return OutputFile.from_filename(output_file.name)
      return OutputFile.from_filename(output_file.name)