Fix validation.
[librarian.git] / src / librarian / epub.py
index be9488a..a3931b5 100644 (file)
@@ -9,16 +9,17 @@ import os
 import os.path
 import re
 import subprocess
 import os.path
 import re
 import subprocess
-from six import BytesIO
+import six
 from copy import deepcopy
 from mimetypes import guess_type
 
 from copy import deepcopy
 from mimetypes import guess_type
 
+from ebooklib import epub
 from lxml import etree
 from lxml import etree
-import zipfile
+from PIL import Image
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
-from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
+from librarian import RDFNS, WLNS, DCNS, OutputFile
 from librarian.cover import make_cover
 
 from librarian import functions, get_resource
 from librarian.cover import make_cover
 
 from librarian import functions, get_resource
@@ -26,29 +27,17 @@ from librarian import functions, get_resource
 from librarian.hyphenator import Hyphenator
 
 functions.reg_person_name()
 from librarian.hyphenator import Hyphenator
 
 functions.reg_person_name()
-functions.reg_lang_code_3to2()
 
 
 def squeeze_whitespace(s):
 
 
 def squeeze_whitespace(s):
+    return s
     return re.sub(b'\\s+', b' ', s)
 
 
 def set_hyph_language(source_tree):
     return re.sub(b'\\s+', b' ', s)
 
 
 def set_hyph_language(source_tree):
-    def get_short_lng_code(text):
-        result = ''
-        text = ''.join(text)
-        with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f.read().decode('latin1').split('\n'):
-                list = line.strip().split('|')
-                if list[0] == text:
-                    result = list[2]
-        if result == '':
-            return text
-        else:
-            return result
     bibl_lng = etree.XPath('//dc:language//text()',
                            namespaces={'dc': str(DCNS)})(source_tree)
     bibl_lng = etree.XPath('//dc:language//text()',
                            namespaces={'dc': str(DCNS)})(source_tree)
-    short_lng = get_short_lng_code(bibl_lng[0])
+    short_lng = functions.lang_code_3to2(bibl_lng[0])
     try:
         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
                                        short_lng + '.dic'))
     try:
         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
                                        short_lng + '.dic'))
@@ -74,31 +63,6 @@ def hyphenate_and_fix_conjunctions(source_tree, hyph):
             parent.tail = newt
 
 
             parent.tail = newt
 
 
-def inner_xml(node):
-    """ returns node's text and children as a string
-
-    >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
-    x<b>y</b>z
-    """
-
-    nt = node.text if node.text is not None else ''
-    return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
-
-
-def set_inner_xml(node, text):
-    """ sets node's text and children from a string
-
-    >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
-    >>> set_inner_xml(e, 'x<b>y</b>z')
-    >>> print(etree.tostring(e, encoding='unicode'))
-    <a>x<b>y</b>z</a>
-    """
-
-    p = etree.fromstring('<x>%s</x>' % text)
-    node.text = p.text
-    node[:] = p[:]
-
-
 def node_name(node):
     """ Find out a node's name
 
 def node_name(node):
     """ Find out a node's name
 
@@ -122,7 +86,10 @@ def xslt(xml, sheet, **kwargs):
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
         transform = etree.XSLT(etree.parse(xsltf))
         xml = etree.ElementTree(xml)
     with open(sheet) as xsltf:
         transform = etree.XSLT(etree.parse(xsltf))
-        params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
+        params = dict(
+            (key, transform.strparam(value))
+            for key, value in kwargs.items()
+        )
         return transform(xml, **params)
 
 
         return transform(xml, **params)
 
 
@@ -170,11 +137,17 @@ class Stanza(object):
     Slashes may only occur directly in the stanza. Any slashes in subelements
     will be ignored, and the subelements will be put inside verse elements.
 
     Slashes may only occur directly in the stanza. Any slashes in subelements
     will be ignored, and the subelements will be put inside verse elements.
 
-    >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
+    >>> s = etree.fromstring(
+    ...         "<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>"
+    ...     )
     >>> Stanza(s).versify()
     >>> Stanza(s).versify()
-    >>> print(etree.tostring(s, encoding='unicode'))
-    <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
-    y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
+    >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip())
+    <strofa>
+      <wers_normalny>a <b>c</b><b>c</b></wers_normalny>
+      <wers_normalny>b<x>x/
+    y</x>c</wers_normalny>
+      <wers_normalny>d</wers_normalny>
+    </strofa>
 
     """
     def __init__(self, stanza_elem):
 
     """
     def __init__(self, stanza_elem):
@@ -190,7 +163,10 @@ class Stanza(object):
         tail = self.stanza.tail
         self.stanza.clear()
         self.stanza.tail = tail
         tail = self.stanza.tail
         self.stanza.clear()
         self.stanza.tail = tail
-        self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0)
+        self.stanza.extend(
+            verse for verse in self.verses
+            if verse.text or len(verse) > 0
+        )
 
     def open_normal_verse(self):
         self.open_verse = self.stanza.makeelement("wers_normalny")
 
     def open_normal_verse(self):
         self.open_verse = self.stanza.makeelement("wers_normalny")
@@ -235,101 +211,6 @@ def replace_by_verse(tree):
         Stanza(stanza).versify()
 
 
         Stanza(stanza).versify()
 
 
-def add_to_manifest(manifest, partno):
-    """ Adds a node to the manifest section in content.opf file """
-
-    partstr = 'part%d' % partno
-    e = manifest.makeelement(
-        OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
-                               'media-type': 'application/xhtml+xml'}
-    )
-    manifest.append(e)
-
-
-def add_to_spine(spine, partno):
-    """ Adds a node to the spine section in content.opf file """
-
-    e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
-    spine.append(e)
-
-
-class TOC(object):
-    def __init__(self, name=None, part_href=None):
-        self.children = []
-        self.name = name
-        self.part_href = part_href
-        self.sub_number = None
-
-    def add(self, name, part_href, level=0, is_part=True, index=None):
-        assert level == 0 or index is None
-        if level > 0 and self.children:
-            return self.children[-1].add(name, part_href, level - 1, is_part)
-        else:
-            t = TOC(name)
-            t.part_href = part_href
-            if index is not None:
-                self.children.insert(index, t)
-            else:
-                self.children.append(t)
-            if not is_part:
-                t.sub_number = len(self.children) + 1
-                return t.sub_number
-
-    def append(self, toc):
-        self.children.append(toc)
-
-    def extend(self, toc):
-        self.children.extend(toc.children)
-
-    def depth(self):
-        if self.children:
-            return max((c.depth() for c in self.children)) + 1
-        else:
-            return 0
-
-    def href(self):
-        src = self.part_href
-        if self.sub_number is not None:
-            src += '#sub%d' % self.sub_number
-        return src
-
-    def write_to_xml(self, nav_map, counter=1):
-        for child in self.children:
-            nav_point = nav_map.makeelement(NCXNS('navPoint'))
-            nav_point.set('id', 'NavPoint-%d' % counter)
-            nav_point.set('playOrder', str(counter))
-
-            nav_label = nav_map.makeelement(NCXNS('navLabel'))
-            text = nav_map.makeelement(NCXNS('text'))
-            if child.name is not None:
-                text.text = re.sub(r'\n', ' ', child.name)
-            else:
-                text.text = child.name
-            nav_label.append(text)
-            nav_point.append(nav_label)
-
-            content = nav_map.makeelement(NCXNS('content'))
-            content.set('src', child.href())
-            nav_point.append(content)
-            nav_map.append(nav_point)
-            counter = child.write_to_xml(nav_point, counter + 1)
-        return counter
-
-    def html_part(self, depth=0):
-        texts = []
-        for child in self.children:
-            texts.append(
-                "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
-                (depth, child.href(), child.name))
-            texts.append(child.html_part(depth + 1))
-        return "\n".join(texts)
-
-    def html(self):
-        with open(get_resource('epub/toc.html'), 'rb') as f:
-            t = f.read().decode('utf-8')
-        return t % self.html_part()
-
-
 def used_chars(element):
     """ Lists characters used in an ETree Element """
     chars = set((element.text or '') + (element.tail or ''))
 def used_chars(element):
     """ Lists characters used in an ETree Element """
     chars = set((element.text or '') + (element.tail or ''))
@@ -348,7 +229,8 @@ def chop(main_text):
 
     last_node_part = False
 
 
     last_node_part = False
 
-    # the below loop are workaround for a problem with epubs in drama ebooks without acts
+    # The below loop are workaround for a problem with epubs
+    # in drama ebooks without acts.
     is_scene = False
     is_act = False
     for one_part in main_text:
     is_scene = False
     is_act = False
     for one_part in main_text:
@@ -376,7 +258,10 @@ def chop(main_text):
                 yield part_xml
                 last_node_part = True
                 main_xml_part[:] = [deepcopy(one_part)]
                 yield part_xml
                 last_node_part = True
                 main_xml_part[:] = [deepcopy(one_part)]
-            elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
+            elif (not last_node_part
+                  and name in (
+                      "naglowek_rozdzial", "naglowek_akt", "srodtytul"
+                  )):
                 yield part_xml
                 main_xml_part[:] = [deepcopy(one_part)]
             else:
                 yield part_xml
                 main_xml_part[:] = [deepcopy(one_part)]
             else:
@@ -385,21 +270,63 @@ def chop(main_text):
     yield part_xml
 
 
     yield part_xml
 
 
-def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
-    """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
+def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
+                    _empty_html_static=[]):
+    """
+    Transforms one chunk, returns a HTML string, a TOC object
+    and a set of used characters.
+    """
 
 
-    toc = TOC()
+    toc = []
     for element in chunk_xml[0]:
         if element.tag == "naglowek_czesc":
     for element in chunk_xml[0]:
         if element.tag == "naglowek_czesc":
-            toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml#book-text" % chunk_no,
+                        node_name(element),
+                        "part%d-text" % chunk_no
+                    ),
+                    []
+                )
+            )
         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
-            toc.add(node_name(element), "part%d.html" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml" % chunk_no,
+                        node_name(element),
+                        "part%d" % chunk_no
+                    ),
+                    []
+                )
+            )
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
-            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
-            element.set('sub', str(subnumber))
+            if not toc:
+                toc.append(
+                    (
+                        epub.Link(
+                            "part%d.xhtml" % chunk_no,
+                            " ",
+                            "part%d" % chunk_no
+                        ),
+                        []
+                    )
+                )
+
+            subnumber = len(toc[-1][1])
+            toc[-1][1].append(
+                epub.Link(
+                    "part%d.xhtml#sub%d" % (chunk_no, subnumber),
+                    node_name(element),
+                    "part%d-sub%d" % (chunk_no, subnumber)
+                )
+            )
+            element.set('sub', six.text_type(subnumber))
     if empty:
         if not _empty_html_static:
     if empty:
         if not _empty_html_static:
-            _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
+            with open(get_resource('epub/emptyChunk.xhtml')) as f:
+                _empty_html_static.append(f.read())
         chars = set()
         output_html = _empty_html_static[0]
     else:
         chars = set()
         output_html = _empty_html_static[0]
     else:
@@ -410,63 +337,114 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s
         output_html = etree.tostring(
             html_tree, pretty_print=True, xml_declaration=True,
             encoding="utf-8",
         output_html = etree.tostring(
             html_tree, pretty_print=True, xml_declaration=True,
             encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+            doctype='<!DOCTYPE html>'
         )
     return output_html, toc, chars
 
 
         )
     return output_html, toc, chars
 
 
-def transform(wldoc, verbose=False, style=None, html_toc=False,
-              sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
-    """ produces a EPUB file
+def remove_empty_lists_from_toc(toc):
+    for i, e in enumerate(toc):
+        if isinstance(e, tuple):
+            if e[1]:
+                remove_empty_lists_from_toc(e[1])
+            else:
+                toc[i] = e[0]
+
 
 
-    sample=n: generate sample e-book (with at least n paragraphs)
-    cover: a cover.Cover factory or True for default
-    flags: less-advertising, without-fonts, working-copy
-    """
 
 
-    def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
+def transform_file(wldoc, chunk_counter=1, first=True, sample=None, hyphenate=False, output_type='epub', spine=None, output=None, annotations=None):
         """ processes one input file and proceeds to its children """
 
         replace_characters(wldoc.edoc.getroot())
 
         """ processes one input file and proceeds to its children """
 
         replace_characters(wldoc.edoc.getroot())
 
-        hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
+        hyphenator = set_hyph_language(
+            wldoc.edoc.getroot()
+        ) if hyphenate else None
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
         # pointing to starting chunk
         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 
         # every input file will have a TOC entry,
         # pointing to starting chunk
-        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
+        toc = [
+            (
+                epub.Link(
+                    "part%d.xhtml" % chunk_counter,
+                    wldoc.book_info.title,
+                    "path%d-start" % chunk_counter
+                ),
+                []
+            )
+        ]
         chars = set()
         if first:
             # write book title page
         chars = set()
         if first:
             # write book title page
-            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
+            html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'),
+                             outputtype=output_type)
             chars = used_chars(html_tree.getroot())
             html_string = etree.tostring(
                 html_tree, pretty_print=True, xml_declaration=True,
                 encoding="utf-8",
             chars = used_chars(html_tree.getroot())
             html_string = etree.tostring(
                 html_tree, pretty_print=True, xml_declaration=True,
                 encoding="utf-8",
-                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                doctype='<!DOCTYPE html>'
+            )
+            item = epub.EpubItem(
+                uid="titlePage",
+                file_name="title.xhtml",
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
             )
             )
-            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
+            spine.append(item)
+            output.add_item(item)
             # add a title page TOC entry
             # add a title page TOC entry
-            toc.add(u"Strona tytułowa", "title.html")
+            toc[-1][1].append(
+                epub.Link(
+                    "title.xhtml",
+                    "Strona tytułowa",
+                    "title",
+                )
+            )
+
+            item = epub.EpubNav()
+            toc[-1][1].append(
+                epub.Link(
+                    "nav.xhtml",
+                    "Spis treści",
+                    "nav"
+                )
+            )
+            output.add_item(item)
+            spine.append(item)
+
+            toc[-1][1].append(
+                epub.Link(
+                    "part1.xhtml",
+                    "Początek utworu",
+                    "part1"
+                )
+            )
+
         elif wldoc.book_info.parts:
             # write title page for every parent
             if sample is not None and sample <= 0:
                 chars = set()
         elif wldoc.book_info.parts:
             # write title page for every parent
             if sample is not None and sample <= 0:
                 chars = set()
-                html_string = open(get_resource('epub/emptyChunk.html')).read()
+                html_string = open(
+                    get_resource('epub/emptyChunk.xhtml')).read()
             else:
             else:
-                html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
+                html_tree = xslt(wldoc.edoc,
+                                 get_resource('epub/xsltChunkTitle.xsl'))
                 chars = used_chars(html_tree.getroot())
                 html_string = etree.tostring(
                     html_tree, pretty_print=True, xml_declaration=True,
                     encoding="utf-8",
                 chars = used_chars(html_tree.getroot())
                 html_string = etree.tostring(
                     html_tree, pretty_print=True, xml_declaration=True,
                     encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                    doctype='<!DOCTYPE html>'
                 )
                 )
-            zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
-            add_to_manifest(manifest, chunk_counter)
-            add_to_spine(spine, chunk_counter)
+            item = epub.EpubItem(
+                uid="part%d" % chunk_counter,
+                file_name="part%d.xhtml" % chunk_counter,
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
+            )
+            output.add_item(item)
+            spine.append(item)
+
             chunk_counter += 1
 
         if len(wldoc.edoc.getroot()) > 1:
             chunk_counter += 1
 
         if len(wldoc.edoc.getroot()) > 1:
@@ -485,24 +463,47 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                     if sample <= 0:
                         empty = True
                     else:
                     if sample <= 0:
                         empty = True
                     else:
-                        sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
-                chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
+                        sample -= len(chunk_xml.xpath(
+                            '//strofa|//akap|//akap_cd|//akap_dialog'
+                        ))
+                chunk_html, chunk_toc, chunk_chars = transform_chunk(
+                    chunk_xml, chunk_counter, annotations, empty)
 
 
-                toc.extend(chunk_toc)
+                toc[-1][1].extend(chunk_toc)
                 chars = chars.union(chunk_chars)
                 chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
-                add_to_manifest(manifest, chunk_counter)
-                add_to_spine(spine, chunk_counter)
+                item = epub.EpubItem(
+                    uid="part%d" % chunk_counter,
+                    file_name="part%d.xhtml" % chunk_counter,
+                    media_type="application/xhtml+xml",
+                    content=squeeze_whitespace(chunk_html)
+                )
+                output.add_item(item)
+                spine.append(item)
                 chunk_counter += 1
 
         for child in wldoc.parts():
             child_toc, chunk_counter, chunk_chars, sample = transform_file(
                 chunk_counter += 1
 
         for child in wldoc.parts():
             child_toc, chunk_counter, chunk_chars, sample = transform_file(
-                child, chunk_counter, first=False, sample=sample)
-            toc.append(child_toc)
+                child, chunk_counter, first=False, sample=sample,
+                hyphenate=hyphenate, output_type=output_type,
+                spine=spine, output=output, annotations=annotations,
+            )
+            toc[-1][1].extend(child_toc)
             chars = chars.union(chunk_chars)
 
         return toc, chunk_counter, chars, sample
 
             chars = chars.union(chunk_chars)
 
         return toc, chunk_counter, chars, sample
 
+                
+def transform(wldoc, verbose=False, style=None,
+              sample=None, cover=None, flags=None, hyphenate=False,
+              base_url='file://./', output_type='epub'):
+    """ produces a EPUB file
+
+    sample=n: generate sample e-book (with at least n paragraphs)
+    cover: a cover.Cover factory or True for default
+    flags: less-advertising, without-fonts, working-copy
+    """
+
+
     document = deepcopy(wldoc)
     del wldoc
 
     document = deepcopy(wldoc)
     del wldoc
 
@@ -524,147 +525,214 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
     if document.book_info.thanks:
         document.edoc.getroot().set('thanks', document.book_info.thanks)
 
     if document.book_info.thanks:
         document.edoc.getroot().set('thanks', document.book_info.thanks)
 
-    opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
-    manifest = opf.find('.//' + OPFNS('manifest'))
-    guide = opf.find('.//' + OPFNS('guide'))
-    spine = opf.find('.//' + OPFNS('spine'))
+    output = epub.EpubBook()
+    output.set_identifier(six.text_type(document.book_info.url))
+    output.set_language(functions.lang_code_3to2(document.book_info.language))
+    output.set_title(document.book_info.title)
+    for i, author in enumerate(document.book_info.authors):
+        output.add_author(
+            author.readable(),
+            file_as=six.text_type(author),
+            uid='creator{}'.format(i)
+        )
+    for translator in document.book_info.translators:
+        output.add_author(
+            translator.readable(),
+            file_as=six.text_type(translator),
+            role='trl',
+            uid='translator{}'.format(i)
+        )
+    for publisher in document.book_info.publisher:
+        output.add_metadata("DC", "publisher", publisher)
+    output.add_metadata("DC", "date", document.book_info.created_at)
 
 
-    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
-    zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
+    output.guide.append({
+        "type": "text",
+        "title": "Początek",
+        "href": "part1.xhtml"
+    })
 
 
-    functions.reg_mathml_epub(zip)
+    output.add_item(epub.EpubNcx())
 
 
-    if os.path.isdir(ilustr_path):
-        ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr'))
-        for i, filename in enumerate(os.listdir(ilustr_path)):
-            if filename not in ilustr_elements:
-                continue
-            file_path = os.path.join(ilustr_path, filename)
-            zip.write(file_path, os.path.join('OPS', filename))
-            image_id = 'image%s' % i
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
+    spine = output.spine
+
+    functions.reg_mathml_epub(output)
+
+    # FIXME
+    for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
+        url = six.moves.urllib.parse.urljoin(
+            base_url,
+            ilustr.get('src')
+        )
+        imgfile = six.moves.urllib.request.urlopen(url)
+        img = Image.open(imgfile)
+
+        th_format, ext, media_type = {
+            'GIF': ('GIF', 'gif', 'image/gif'),
+            'PNG': ('PNG', 'png', 'image/png'),
+        }.get(img.format, ('JPEG', 'jpg', 'image/jpeg'))
 
 
+        width = 1200
+        if img.size[0] < width:
+            th = img
+        else:
+            th = img.resize((width, round(width * img.size[1] / img.size[0])))
+
+        imgfile.close()
+            
+        buffer = six.BytesIO()
+        th.save(buffer, format=th_format)
+
+        file_name = 'image%d.%s' % (i, ext)
+        ilustr.set('src', file_name)
+        output.add_item(
+            epub.EpubItem(
+                uid='image%s' % i,
+                file_name=file_name,
+                media_type=media_type,
+                content=buffer.getvalue()
+            )
+        )
+            
     # write static elements
     # write static elements
-    mime = zipfile.ZipInfo()
-    mime.filename = 'mimetype'
-    mime.compress_type = zipfile.ZIP_STORED
-    mime.extra = b''
-    zip.writestr(mime, b'application/epub+zip')
-    zip.writestr(
-        'META-INF/container.xml',
-        b'<?xml version="1.0" ?>'
-        b'<container version="1.0" '
-        b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
-        b'<rootfiles><rootfile full-path="OPS/content.opf" '
-        b'media-type="application/oebps-package+xml" />'
-        b'</rootfiles></container>'
-    )
-    zip.write(get_resource('res/wl-logo-small.png'),
-              os.path.join('OPS', 'logo_wolnelektury.png'))
-    zip.write(get_resource('res/jedenprocent.png'),
-              os.path.join('OPS', 'jedenprocent.png'))
+
+    with open(get_resource('res/wl-logo-small.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="logo_wolnelektury.png",
+                file_name="logo_wolnelektury.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+    with open(get_resource('res/jedenprocent.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="jedenprocent",
+                file_name="jedenprocent.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+
     if not style:
         style = get_resource('epub/style.css')
     if not style:
         style = get_resource('epub/style.css')
-    zip.write(style, os.path.join('OPS', 'style.css'))
+    with open(style, 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="style",
+                file_name="style.css",
+                media_type="text/css",
+                content=f.read()
+            )
+        )
 
     if cover:
         if cover is True:
             cover = make_cover
 
 
     if cover:
         if cover is True:
             cover = make_cover
 
-        cover_file = BytesIO()
+        cover_file = six.BytesIO()
         bound_cover = cover(document.book_info)
         bound_cover.save(cover_file)
         cover_name = 'cover.%s' % bound_cover.ext()
         bound_cover = cover(document.book_info)
         bound_cover.save(cover_file)
         cover_name = 'cover.%s' % bound_cover.ext()
-        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
-        del cover_file
 
 
-        cover_tree = etree.parse(get_resource('epub/cover.html'))
-        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
-        zip.writestr('OPS/cover.html', etree.tostring(
-            cover_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
+        output.set_cover(
+            file_name=cover_name,
+            content=cover_file.getvalue(),
+        )
+        spine.append('cover')
+        output.guide.append({
+            "type": "cover",
+            "href": "cover.xhtml",
+            "title": "Okładka",
+        })
+
+        del cover_file
 
         if bound_cover.uses_dc_cover:
             if document.book_info.cover_by:
 
         if bound_cover.uses_dc_cover:
             if document.book_info.cover_by:
-                document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
+                document.edoc.getroot().set('data-cover-by',
+                                            document.book_info.cover_by)
             if document.book_info.cover_source:
             if document.book_info.cover_source:
-                document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
-
-        manifest.append(etree.fromstring(
-            '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
-        manifest.append(etree.fromstring(
-            '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
-        spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
-        opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
-        guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
+                document.edoc.getroot().set('data-cover-source',
+                                            document.book_info.cover_source)
 
     annotations = etree.Element('annotations')
 
 
     annotations = etree.Element('annotations')
 
-    toc_file = etree.fromstring(
-        b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
-        b'"-//NISO//DTD ncx 2005-1//EN" '
-        b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
-        b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
-        b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
-        b'</navMap></ncx>'
+    toc, chunk_counter, chars, sample = transform_file(
+        document, sample=sample,
+        hyphenate=hyphenate, output_type=output_type,
+        spine=spine, output=output, annotations=annotations
     )
     )
-    nav_map = toc_file[-1]
-
-    if html_toc:
-        manifest.append(etree.fromstring(
-            '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
-        spine.append(etree.fromstring(
-            '<itemref idref="html_toc" />'))
-        guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
-
-    toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
-
-    if len(toc.children) < 2:
-        toc.add(u"Początek utworu", "part1.html")
+    output.toc = toc[0][1]
 
     # Last modifications in container files and EPUB creation
     if len(annotations) > 0:
 
     # Last modifications in container files and EPUB creation
     if len(annotations) > 0:
-        toc.add("Przypisy", "annotations.html")
-        manifest.append(etree.fromstring(
-            '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
-        spine.append(etree.fromstring(
-            '<itemref idref="annotations" />'))
+        output.toc.append(
+            epub.Link(
+                "annotations.xhtml",
+                "Przypisy",
+                "annotations"
+            )
+        )
         replace_by_verse(annotations)
         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
         chars = chars.union(used_chars(html_tree.getroot()))
         replace_by_verse(annotations)
         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
         chars = chars.union(used_chars(html_tree.getroot()))
-        zip.writestr('OPS/annotations.html', etree.tostring(
-            html_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
 
 
-    toc.add("Wesprzyj Wolne Lektury", "support.html")
-    manifest.append(etree.fromstring(
-        '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
-    spine.append(etree.fromstring(
-        '<itemref idref="support" />'))
-    html_string = open(get_resource('epub/support.html'), 'rb').read()
+        item = epub.EpubItem(
+            uid="annotations",
+            file_name="annotations.xhtml",
+            media_type="application/xhtml+xml",
+            content=etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html>'
+            )
+        )
+        output.add_item(item)
+        spine.append(item)
+
+    output.toc.append(
+        epub.Link(
+            "support.xhtml",
+            "Wesprzyj Wolne Lektury",
+            "support"
+        )
+    )
+    with open(get_resource('epub/support.xhtml'), 'rb') as f:
+        html_string = f.read()
     chars.update(used_chars(etree.fromstring(html_string)))
     chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
-
-    toc.add("Strona redakcyjna", "last.html")
-    manifest.append(etree.fromstring(
-        '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
-    spine.append(etree.fromstring(
-        '<itemref idref="last" />'))
-    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
+    item = epub.EpubItem(
+        uid="support",
+        file_name="support.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(html_string)
+    )
+    output.add_item(item)
+    spine.append(item)
+
+    output.toc.append(
+        epub.Link(
+            "last.xhtml",
+            "Strona redakcyjna",
+            "last"
+        )
+    )
+    html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
+                     outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
     chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
-        html_tree, pretty_print=True, xml_declaration=True,
-        encoding="utf-8",
-        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    )))
+    item = epub.EpubItem(
+        uid="last",
+        file_name="last.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(etree.tostring(
+            html_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html>'
+        ))
+    )
+    output.add_item(item)
+    spine.append(item)
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
@@ -674,8 +742,10 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
         except OSError:
             cwd = None
 
         except OSError:
             cwd = None
 
-        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
-        for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
+        os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                              'font-optimizer'))
+        for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
+                      'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
             optimizer_call = ['perl', 'subset.pl', '--chars',
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
             optimizer_call = ['perl', 'subset.pl', '--chars',
                               ''.join(chars).encode('utf-8'),
                               get_resource('fonts/' + fname),
@@ -686,33 +756,25 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 subprocess.check_call(optimizer_call, env=env)
             else:
                 dev_null = open(os.devnull, 'w')
                 subprocess.check_call(optimizer_call, env=env)
             else:
                 dev_null = open(os.devnull, 'w')
-                subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
-            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
+                subprocess.check_call(optimizer_call, stdout=dev_null,
+                                      stderr=dev_null, env=env)
+            with open(os.path.join(tmpdir, fname), 'rb') as f:
+                output.add_item(
+                    epub.EpubItem(
+                        uid=fname,
+                        file_name=fname,
+                        media_type="font/ttf",
+                        content=f.read()
+                    )
+                )
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
-    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    title = document.book_info.title
-    attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
-    for st in attributes:
-        meta = toc_file.makeelement(NCXNS('meta'))
-        meta.set('name', st)
-        meta.set('content', '0')
-        toc_file[0].append(meta)
-    toc_file[0][0].set('content', str(document.book_info.url))
-    toc_file[0][1].set('content', str(toc.depth()))
-    set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
-
-    # write TOC
-    if html_toc:
-        toc.add(u"Spis treści", "toc.html", index=1)
-        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
-    toc.write_to_xml(nav_map)
-    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    zip.close()
 
 
+    remove_empty_lists_from_toc(output.toc)
+
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
+                                     delete=False)
+    output_file.close()
+    epub.write_epub(output_file.name, output, {'epub3_landmark': False})
     return OutputFile.from_filename(output_file.name)
     return OutputFile.from_filename(output_file.name)