EPUB3 support.
authorRadek Czajka <rczajka@rczajka.pl>
Fri, 19 Jun 2020 15:02:51 +0000 (17:02 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Fri, 19 Jun 2020 15:02:51 +0000 (17:02 +0200)
20 files changed:
scripts/book2epub
setup.py
src/librarian/book2anything.py
src/librarian/epub.py
src/librarian/epub/cover.html [deleted file]
src/librarian/epub/emptyChunk.html [deleted file]
src/librarian/epub/emptyChunk.xhtml [new file with mode: 0644]
src/librarian/epub/support.html [deleted file]
src/librarian/epub/support.xhtml [new file with mode: 0644]
src/librarian/epub/toc.html [deleted file]
src/librarian/epub/toc.xhtml [new file with mode: 0644]
src/librarian/epub/xsltAnnotations.xsl
src/librarian/epub/xsltChunkTitle.xsl
src/librarian/epub/xsltContent.xsl [deleted file]
src/librarian/epub/xsltLast.xsl
src/librarian/epub/xsltScheme.xsl
src/librarian/epub/xsltTitle.xsl
src/librarian/functions.py
src/librarian/mobi.py
tests/test_epub.py

index 5b906b9..c1027c5 100755 (executable)
@@ -19,11 +19,6 @@ class Book2Epub(Book2Anything):
                 action='store_true', default=False,
                 help='mark the output as a working copy')
         ]
-    transform_options = [
-        Option('-t', '--html-toc',
-                action='store_true', dest='html_toc', default=False,
-                help='with inline html toc')
-        ]
 
 
 if __name__ == '__main__':
index 53cc182..0466e08 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,7 @@ setup(
         'Pillow',
         'six',
         'texml',
+        'ebooklib',
     ],
     scripts=['scripts/book2html',
              'scripts/book2txt',
index d954ce6..a1d5687 100755 (executable)
@@ -11,7 +11,7 @@ import optparse
 import six
 from librarian import DirDocProvider, ParseError
 from librarian.parser import WLDocument
-from librarian.cover import make_cover
+from librarian.cover import make_cover, COVER_CLASSES
 
 
 class Option(object):
@@ -82,6 +82,10 @@ class Book2Anything(object):
                 help='prefix for image download cache'
                 + (' (implies --with-cover)' if cls.cover_optional else '')
             )
+            parser.add_option(
+                '--cover-class', dest='cover_class',
+                help='cover class name'
+            )
         for option in (
                 cls.parser_options
                 + cls.transform_options
@@ -118,11 +122,14 @@ class Book2Anything(object):
                 def cover_class(book_info, *args, **kwargs):
                     return make_cover(
                         book_info, image_cache=options.image_cache,
+                        cover_class=options.cover_class,
                         *args, **kwargs
                     )
                 transform_args['cover'] = cover_class
             elif not cls.cover_optional or options.with_cover:
-                transform_args['cover'] = make_cover
+                cover_class = COVER_CLASSES.get(
+                    options.cover_class, make_cover)
+                transform_args['cover'] = cover_class
 
         # Do some real work
         try:
index 137796e..a8c6680 100644 (file)
@@ -9,16 +9,16 @@ import os
 import os.path
 import re
 import subprocess
-from six import BytesIO
+import six
 from copy import deepcopy
 from mimetypes import guess_type
 
+from ebooklib import epub
 from lxml import etree
-import zipfile
 from tempfile import mkdtemp, NamedTemporaryFile
 from shutil import rmtree
 
-from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
+from librarian import RDFNS, WLNS, DCNS, OutputFile
 from librarian.cover import make_cover
 
 from librarian import functions, get_resource
@@ -26,7 +26,6 @@ from librarian import functions, get_resource
 from librarian.hyphenator import Hyphenator
 
 functions.reg_person_name()
-functions.reg_lang_code_3to2()
 
 
 def squeeze_whitespace(s):
@@ -34,21 +33,9 @@ def squeeze_whitespace(s):
 
 
 def set_hyph_language(source_tree):
-    def get_short_lng_code(text):
-        result = ''
-        text = ''.join(text)
-        with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f.read().decode('latin1').split('\n'):
-                list = line.strip().split('|')
-                if list[0] == text:
-                    result = list[2]
-        if result == '':
-            return text
-        else:
-            return result
     bibl_lng = etree.XPath('//dc:language//text()',
                            namespaces={'dc': str(DCNS)})(source_tree)
-    short_lng = get_short_lng_code(bibl_lng[0])
+    short_lng = functions.lang_code_3to2(bibl_lng[0])
     try:
         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
                                        short_lng + '.dic'))
@@ -249,104 +236,6 @@ def replace_by_verse(tree):
         Stanza(stanza).versify()
 
 
-def add_to_manifest(manifest, partno):
-    """ Adds a node to the manifest section in content.opf file """
-
-    partstr = 'part%d' % partno
-    e = manifest.makeelement(
-        OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
-                               'media-type': 'application/xhtml+xml'}
-    )
-    manifest.append(e)
-
-
-def add_to_spine(spine, partno):
-    """ Adds a node to the spine section in content.opf file """
-
-    e = spine.makeelement(
-        OPFNS('itemref'),
-        attrib={'idref': 'part%d' % partno}
-    )
-    spine.append(e)
-
-
-class TOC(object):
-    def __init__(self, name=None, part_href=None):
-        self.children = []
-        self.name = name
-        self.part_href = part_href
-        self.sub_number = None
-
-    def add(self, name, part_href, level=0, is_part=True, index=None):
-        assert level == 0 or index is None
-        if level > 0 and self.children:
-            return self.children[-1].add(name, part_href, level - 1, is_part)
-        else:
-            t = TOC(name)
-            t.part_href = part_href
-            if index is not None:
-                self.children.insert(index, t)
-            else:
-                self.children.append(t)
-            if not is_part:
-                t.sub_number = len(self.children) + 1
-                return t.sub_number
-
-    def append(self, toc):
-        self.children.append(toc)
-
-    def extend(self, toc):
-        self.children.extend(toc.children)
-
-    def depth(self):
-        if self.children:
-            return max((c.depth() for c in self.children)) + 1
-        else:
-            return 0
-
-    def href(self):
-        src = self.part_href
-        if self.sub_number is not None:
-            src += '#sub%d' % self.sub_number
-        return src
-
-    def write_to_xml(self, nav_map, counter=1):
-        for child in self.children:
-            nav_point = nav_map.makeelement(NCXNS('navPoint'))
-            nav_point.set('id', 'NavPoint-%d' % counter)
-            nav_point.set('playOrder', str(counter))
-
-            nav_label = nav_map.makeelement(NCXNS('navLabel'))
-            text = nav_map.makeelement(NCXNS('text'))
-            if child.name is not None:
-                text.text = re.sub(r'\n', ' ', child.name)
-            else:
-                text.text = child.name
-            nav_label.append(text)
-            nav_point.append(nav_label)
-
-            content = nav_map.makeelement(NCXNS('content'))
-            content.set('src', child.href())
-            nav_point.append(content)
-            nav_map.append(nav_point)
-            counter = child.write_to_xml(nav_point, counter + 1)
-        return counter
-
-    def html_part(self, depth=0):
-        texts = []
-        for child in self.children:
-            texts.append(
-                "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
-                (depth, child.href(), child.name))
-            texts.append(child.html_part(depth + 1))
-        return "\n".join(texts)
-
-    def html(self):
-        with open(get_resource('epub/toc.html'), 'rb') as f:
-            t = f.read().decode('utf-8')
-        return t % self.html_part()
-
-
 def used_chars(element):
     """ Lists characters used in an ETree Element """
     chars = set((element.text or '') + (element.tail or ''))
@@ -413,19 +302,43 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
     and a set of used characters.
     """
 
-    toc = TOC()
+    toc = []
     for element in chunk_xml[0]:
         if element.tag == "naglowek_czesc":
-            toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml#book-text" % chunk_no,
+                        node_name(element),
+                        "part%d-text" % chunk_no
+                    ),
+                    []
+                )
+            )
         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
-            toc.add(node_name(element), "part%d.html" % chunk_no)
+            toc.append(
+                (
+                    epub.Link(
+                        "part%d.xhtml" % chunk_no,
+                        node_name(element),
+                        "part%d" % chunk_no
+                    ),
+                    []
+                )
+            )
         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
-            subnumber = toc.add(node_name(element), "part%d.html" % chunk_no,
-                                level=1, is_part=False)
-            element.set('sub', str(subnumber))
+            subnumber = len(toc[-1][1])
+            toc[-1][1].append(
+                epub.Link(
+                    "part%d.xhtml#sub%d" % (chunk_no, subnumber),
+                    node_name(element),
+                    "part%d-sub%d" % (chunk_no, subnumber)
+                )
+            )
+            element.set('sub', six.text_type(subnumber))
     if empty:
         if not _empty_html_static:
-            with open(get_resource('epub/emptyChunk.html')) as f:
+            with open(get_resource('epub/emptyChunk.xhtml')) as f:
                 _empty_html_static.append(f.read())
         chars = set()
         output_html = _empty_html_static[0]
@@ -437,13 +350,21 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
         output_html = etree.tostring(
             html_tree, pretty_print=True, xml_declaration=True,
             encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+            doctype='<!DOCTYPE html>'
         )
     return output_html, toc, chars
 
 
-def transform(wldoc, verbose=False, style=None, html_toc=False,
+def remove_empty_lists_from_toc(toc):
+    for i, e in enumerate(toc):
+        if isinstance(e, tuple):
+            if e[1]:
+                remove_empty_lists_from_toc(e[1])
+            else:
+                toc[i] = e[0]
+
+
+def transform(wldoc, verbose=False, style=None,
               sample=None, cover=None, flags=None, hyphenate=False,
               ilustr_path='', output_type='epub'):
     """ produces a EPUB file
@@ -465,7 +386,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
 
         # every input file will have a TOC entry,
         # pointing to starting chunk
-        toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
+        toc = [
+            (
+                epub.Link(
+                    "part%d.xhtml" % chunk_counter,
+                    wldoc.book_info.title,
+                    "path%d-start" % chunk_counter
+                ),
+                []
+            )
+        ]
         chars = set()
         if first:
             # write book title page
@@ -475,17 +405,42 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
             html_string = etree.tostring(
                 html_tree, pretty_print=True, xml_declaration=True,
                 encoding="utf-8",
-                doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
-                        ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                doctype='<!DOCTYPE html>'
+            )
+            item = epub.EpubItem(
+                uid="titlePage",
+                file_name="title.xhtml",
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
             )
-            zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
+            spine.append(item)
+            output.add_item(item)
             # add a title page TOC entry
-            toc.add(u"Strona tytułowa", "title.html")
+            toc[-1][1].append(
+                epub.Link(
+                    "title.xhtml",
+                    "Strona tytułowa",
+                    "title",
+                )
+            )
+
+            item = epub.EpubNav()
+            toc[-1][1].append(
+                epub.Link(
+                    "nav.xhtml",
+                    "Spis treści",
+                    "nav"
+                )
+            )
+            output.add_item(item)
+            spine.append(item)
+
         elif wldoc.book_info.parts:
             # write title page for every parent
             if sample is not None and sample <= 0:
                 chars = set()
-                html_string = open(get_resource('epub/emptyChunk.html')).read()
+                html_string = open(
+                    get_resource('epub/emptyChunk.xhtml')).read()
             else:
                 html_tree = xslt(wldoc.edoc,
                                  get_resource('epub/xsltChunkTitle.xsl'))
@@ -493,13 +448,17 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 html_string = etree.tostring(
                     html_tree, pretty_print=True, xml_declaration=True,
                     encoding="utf-8",
-                    doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
-                            ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
+                    doctype='<!DOCTYPE html>'
                 )
-            zip.writestr('OPS/part%d.html' % chunk_counter,
-                         squeeze_whitespace(html_string))
-            add_to_manifest(manifest, chunk_counter)
-            add_to_spine(spine, chunk_counter)
+            item = epub.EpubItem(
+                uid="part%d" % chunk_counter,
+                file_name="part%d.xhtml" % chunk_counter,
+                media_type="application/xhtml+xml",
+                content=squeeze_whitespace(html_string)
+            )
+            output.add_item(item)
+            spine.append(item)
+
             chunk_counter += 1
 
         if len(wldoc.edoc.getroot()) > 1:
@@ -524,18 +483,22 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 chunk_html, chunk_toc, chunk_chars = transform_chunk(
                     chunk_xml, chunk_counter, annotations, empty)
 
-                toc.extend(chunk_toc)
+                toc[-1][1].extend(chunk_toc)
                 chars = chars.union(chunk_chars)
-                zip.writestr('OPS/part%d.html' % chunk_counter,
-                             squeeze_whitespace(chunk_html))
-                add_to_manifest(manifest, chunk_counter)
-                add_to_spine(spine, chunk_counter)
+                item = epub.EpubItem(
+                    uid="part%d" % chunk_counter,
+                    file_name="part%d.xhtml" % chunk_counter,
+                    media_type="application/xhtml+xml",
+                    content=squeeze_whitespace(chunk_html)
+                )
+                output.add_item(item)
+                spine.append(item)
                 chunk_counter += 1
 
         for child in wldoc.parts():
             child_toc, chunk_counter, chunk_chars, sample = transform_file(
                 child, chunk_counter, first=False, sample=sample)
-            toc.append(child_toc)
+            toc[-1][1].extend(child_toc)
             chars = chars.union(chunk_chars)
 
         return toc, chunk_counter, chars, sample
@@ -561,15 +524,34 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
     if document.book_info.thanks:
         document.edoc.getroot().set('thanks', document.book_info.thanks)
 
-    opf = xslt(document.book_info.to_etree(),
-               get_resource('epub/xsltContent.xsl'))
-    manifest = opf.find('.//' + OPFNS('manifest'))
-    guide = opf.find('.//' + OPFNS('guide'))
-    spine = opf.find('.//' + OPFNS('spine'))
+    output = epub.EpubBook()
+    output.set_identifier(six.text_type(document.book_info.url))
+    output.set_language(functions.lang_code_3to2(document.book_info.language))
+    output.set_title(document.book_info.title)
+    for author in document.book_info.authors:
+        output.add_author(
+            author.readable(),
+            file_as=six.text_type(author)
+        )
+    for translator in document.book_info.translators:
+        output.add_author(
+            translator.readable(),
+            file_as=six.text_type(translator),
+            role='translator'
+        )
+    for publisher in document.book_info.publisher:
+        output.add_metadata("DC", "publisher", publisher)
+    output.add_metadata("DC", "date", document.book_info.created_at)
 
-    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
-                                     delete=False)
-    zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
+    output.guide.append({
+        "type": "text",
+        "title": "Początek",
+        "href": "part1.xhtml"
+    })
+
+    output.add_item(epub.EpubNcx())
+
+    spine = output.spine
 
     functions.reg_mathml_epub(zip)
 
@@ -580,55 +562,70 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
             if filename not in ilustr_elements:
                 continue
             file_path = os.path.join(ilustr_path, filename)
-            zip.write(file_path, os.path.join('OPS', filename))
-            image_id = 'image%s' % i
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" media-type="%s" />' % (
-                    image_id, filename, guess_type(file_path)[0])
-            ))
+            with open(file_path, 'rb') as f:
+                output.add_item(
+                    epub.EpubItem(
+                        uid='image%s' % i,
+                        file_name=filename,
+                        media_type=guess_type(file_path)[0],
+                        content=f.read()
+                    )
+                )
 
     # write static elements
-    mime = zipfile.ZipInfo()
-    mime.filename = 'mimetype'
-    mime.compress_type = zipfile.ZIP_STORED
-    mime.extra = b''
-    zip.writestr(mime, b'application/epub+zip')
-    zip.writestr(
-        'META-INF/container.xml',
-        b'<?xml version="1.0" ?>'
-        b'<container version="1.0" '
-        b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
-        b'<rootfiles><rootfile full-path="OPS/content.opf" '
-        b'media-type="application/oebps-package+xml" />'
-        b'</rootfiles></container>'
-    )
-    zip.write(get_resource('res/wl-logo-small.png'),
-              os.path.join('OPS', 'logo_wolnelektury.png'))
-    zip.write(get_resource('res/jedenprocent.png'),
-              os.path.join('OPS', 'jedenprocent.png'))
+
+    with open(get_resource('res/wl-logo-small.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="logo_wolnelektury.png",
+                file_name="logo_wolnelektury.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+    with open(get_resource('res/jedenprocent.png'), 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="jedenprocent",
+                file_name="jedenprocent.png",
+                media_type="image/png",
+                content=f.read()
+            )
+        )
+
     if not style:
         style = get_resource('epub/style.css')
-    zip.write(style, os.path.join('OPS', 'style.css'))
+    with open(style, 'rb') as f:
+        output.add_item(
+            epub.EpubItem(
+                uid="style",
+                file_name="style.css",
+                media_type="text/css",
+                content=f.read()
+            )
+        )
 
     if cover:
         if cover is True:
             cover = make_cover
 
-        cover_file = BytesIO()
+        cover_file = six.BytesIO()
         bound_cover = cover(document.book_info)
         bound_cover.save(cover_file)
         cover_name = 'cover.%s' % bound_cover.ext()
-        zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
-        del cover_file
 
-        cover_tree = etree.parse(get_resource('epub/cover.html'))
-        cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
-        zip.writestr('OPS/cover.html', etree.tostring(
-            cover_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
+        output.set_cover(
+            file_name=cover_name,
+            content=cover_file.getvalue(),
+        )
+        spine.append('cover')
+        output.guide.append({
+            "type": "cover",
+            "href": "cover.xhtml",
+            "title": "Okładka",
+        })
+
+        del cover_file
 
         if bound_cover.uses_dc_cover:
             if document.book_info.cover_by:
@@ -638,97 +635,87 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 document.edoc.getroot().set('data-cover-source',
                                             document.book_info.cover_source)
 
-        manifest.append(etree.fromstring(
-            '<item id="cover" href="cover.html" '
-            'media-type="application/xhtml+xml" />'
-        ))
-        manifest.append(etree.fromstring(
-            '<item id="cover-image" href="%s" media-type="%s" />' % (
-                cover_name, bound_cover.mime_type()
-            )
-        ))
-        spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
-        opf.getroot()[0].append(etree.fromstring(
-            '<meta name="cover" content="cover-image"/>'
-        ))
-        guide.append(etree.fromstring(
-            '<reference href="cover.html" type="cover" title="Okładka"/>'
-        ))
-
     annotations = etree.Element('annotations')
 
-    toc_file = etree.fromstring(
-        b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
-        b'"-//NISO//DTD ncx 2005-1//EN" '
-        b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
-        b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
-        b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
-        b'</navMap></ncx>'
-    )
-    nav_map = toc_file[-1]
-
-    if html_toc:
-        manifest.append(etree.fromstring(
-            '<item id="html_toc" href="toc.html" '
-            'media-type="application/xhtml+xml" />'
-        ))
-        spine.append(etree.fromstring(
-            '<itemref idref="html_toc" />'))
-        guide.append(etree.fromstring(
-            '<reference href="toc.html" type="toc" title="Spis treści"/>'
-        ))
-
     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
-
-    if len(toc.children) < 2:
-        toc.add(u"Początek utworu", "part1.html")
+    output.toc = toc[0][1]
+
+    if len(toc) < 2:
+        toc.append(
+            epub.Link(
+                "part1.xhtml",
+                "Początek utworu",
+                "part1"
+            )
+        )
 
     # Last modifications in container files and EPUB creation
     if len(annotations) > 0:
-        toc.add("Przypisy", "annotations.html")
-        manifest.append(etree.fromstring(
-            '<item id="annotations" href="annotations.html" '
-            'media-type="application/xhtml+xml" />'
-        ))
-        spine.append(etree.fromstring(
-            '<itemref idref="annotations" />'))
+        toc.append(
+            epub.Link(
+                "annotations.xhtml",
+                "Przypisy",
+                "annotations"
+            )
+        )
         replace_by_verse(annotations)
         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
         chars = chars.union(used_chars(html_tree.getroot()))
-        zip.writestr('OPS/annotations.html', etree.tostring(
-            html_tree, pretty_print=True, xml_declaration=True,
-            encoding="utf-8",
-            doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                    '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-        ))
 
-    toc.add("Wesprzyj Wolne Lektury", "support.html")
-    manifest.append(etree.fromstring(
-        '<item id="support" href="support.html" '
-        'media-type="application/xhtml+xml" />'
-    ))
-    spine.append(etree.fromstring(
-        '<itemref idref="support" />'))
-    html_string = open(get_resource('epub/support.html'), 'rb').read()
+        item = epub.EpubItem(
+            uid="annotations",
+            file_name="annotations.xhtml",
+            media_type="application/xhtml+xml",
+            content=etree.tostring(
+                html_tree, pretty_print=True, xml_declaration=True,
+                encoding="utf-8",
+                doctype='<!DOCTYPE html>'
+            )
+        )
+        output.add_item(item)
+        spine.append(item)
+
+    toc.append(
+        epub.Link(
+            "support.xhtml",
+            "Wesprzyj Wolne Lektury",
+            "support"
+        )
+    )
+    with open(get_resource('epub/support.xhtml'), 'rb') as f:
+        html_string = f.read()
     chars.update(used_chars(etree.fromstring(html_string)))
-    zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
-
-    toc.add("Strona redakcyjna", "last.html")
-    manifest.append(etree.fromstring(
-        '<item id="last" href="last.html" '
-        'media-type="application/xhtml+xml" />'
-    ))
-    spine.append(etree.fromstring(
-        '<itemref idref="last" />'))
+    item = epub.EpubItem(
+        uid="support",
+        file_name="support.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(html_string)
+    )
+    output.add_item(item)
+    spine.append(item)
+
+    toc.append(
+        epub.Link(
+            "last.xhtml",
+            "Strona redakcyjna",
+            "last"
+        )
+    )
     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
                      outputtype=output_type)
     chars.update(used_chars(html_tree.getroot()))
-    zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
-        html_tree, pretty_print=True, xml_declaration=True,
-        encoding="utf-8",
-        doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
-                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
-    )))
+    item = epub.EpubItem(
+        uid="last",
+        file_name="last.xhtml",
+        media_type="application/xhtml+xml",
+        content=squeeze_whitespace(etree.tostring(
+            html_tree, pretty_print=True, xml_declaration=True,
+            encoding="utf-8",
+            doctype='<!DOCTYPE html>'
+        ))
+    )
+    output.add_item(item)
+    spine.append(item)
 
     if not flags or 'without-fonts' not in flags:
         # strip fonts
@@ -754,36 +741,23 @@ def transform(wldoc, verbose=False, style=None, html_toc=False,
                 dev_null = open(os.devnull, 'w')
                 subprocess.check_call(optimizer_call, stdout=dev_null,
                                       stderr=dev_null, env=env)
-            zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
-            manifest.append(etree.fromstring(
-                '<item id="%s" href="%s" '
-                'media-type="application/x-font-truetype" />'
-                % (fname, fname)
-            ))
+            with open(os.path.join(tmpdir, fname), 'rb') as f:
+                output.add_item(
+                    epub.EpubItem(
+                        uid=fname,
+                        file_name=fname,
+                        media_type="font/ttf",
+                        content=f.read()
+                    )
+                )
         rmtree(tmpdir)
         if cwd is not None:
             os.chdir(cwd)
-    zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    title = document.book_info.title
-    attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount",
-                  "dtb:maxPageNumber")
-    for st in attributes:
-        meta = toc_file.makeelement(NCXNS('meta'))
-        meta.set('name', st)
-        meta.set('content', '0')
-        toc_file[0].append(meta)
-    toc_file[0][0].set('content', str(document.book_info.url))
-    toc_file[0][1].set('content', str(toc.depth()))
-    set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
-
-    # write TOC
-    if html_toc:
-        toc.add(u"Spis treści", "toc.html", index=1)
-        zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
-    toc.write_to_xml(nav_map)
-    zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
-                 xml_declaration=True, encoding="utf-8"))
-    zip.close()
 
+    remove_empty_lists_from_toc(output.toc)
+
+    output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
+                                     delete=False)
+    output_file.close()
+    epub.write_epub(output_file.name, output, {'epub3_landmark': False})
     return OutputFile.from_filename(output_file.name)
diff --git a/src/librarian/epub/cover.html b/src/librarian/epub/cover.html
deleted file mode 100644 (file)
index 784067c..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-  <head>
-    <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
-    <title>Okładka</title>
-    <style type="text/css"> img { max-width: 100%; } </style>
-  </head>
-  <body style="oeb-column-number: 1;">
-    <div id="cover-image">
-      <img alt="Okładka" />
-    </div>
-  </body>
-</html>
\ No newline at end of file
diff --git a/src/librarian/epub/emptyChunk.html b/src/librarian/epub/emptyChunk.html
deleted file mode 100644 (file)
index 1452a99..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-    <head>
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
-        <title>WolneLektury.pl</title>
-    </head>
-    <body></body>
-</html>
\ No newline at end of file
diff --git a/src/librarian/epub/emptyChunk.xhtml b/src/librarian/epub/emptyChunk.xhtml
new file mode 100644 (file)
index 0000000..d203c94
--- /dev/null
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+        <title>WolneLektury.pl</title>
+    </head>
+    <body></body>
+</html>
diff --git a/src/librarian/epub/support.html b/src/librarian/epub/support.html
deleted file mode 100755 (executable)
index 9010693..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-  <head>
-    <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
-    <title>Wesprzyj Wolne Lektury</title>
-    <link rel="stylesheet" href="style.css" type="text/css" />
-  </head>
-  <body>
-    <div id="book-text" >
-
-        <h2 class="info">Wesprzyj Wolne Lektury!</h2>
-
-        <p class="info">
-        Wolne Lektury to projekt fundacji Nowoczesna Polska – organizacji
-        pożytku publicznego działającej na rzecz wolności korzystania
-        z&#160;dóbr kultury.</p>
-
-        <p class="info">
-        Co roku do domeny publicznej przechodzi twórczość kolejnych autorów.
-        Dzięki Twojemu wsparciu będziemy je mogli udostępnić wszystkim bezpłatnie.
-        </p>
-
-        <p class="info">
-            <strong>Jak możesz pomóc?</strong>
-        </p>
-
-        <p class="info">
-            <img src="jedenprocent.png" alt="Logo 1%" /><br/>
-            Przekaż 1% podatku na rozwój Wolnych Lektur:<br/>
-            Fundacja Nowoczesna Polska<br/>
-            KRS 0000070056
-        </p>
-
-        <p class="info">
-            Dołącz do <a href="https://wolnelektury.pl/towarzystwo/">Towarzystwa Przyjaciół Wolnych Lektur</a> i pomóż nam rozwijać bibliotekę.
-        </p>
-
-        <p class="info">
-            Przekaż darowiznę na konto:
-            <a href="http://nowoczesnapolska.org.pl/pomoz-nam/wesprzyj-nas/">szczegóły
-            na stronie Fundacji</a>.
-        </p>
-
-    </div>
-  </body>
-</html>
diff --git a/src/librarian/epub/support.xhtml b/src/librarian/epub/support.xhtml
new file mode 100644 (file)
index 0000000..2528ee4
--- /dev/null
@@ -0,0 +1,46 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+    <title>Wesprzyj Wolne Lektury</title>
+    <link rel="stylesheet" href="style.css" type="text/css" />
+  </head>
+  <body>
+    <div id="book-text" >
+
+        <h2 class="info">Wesprzyj Wolne Lektury!</h2>
+
+        <p class="info">
+        Wolne Lektury to projekt fundacji Nowoczesna Polska – organizacji
+        pożytku publicznego działającej na rzecz wolności korzystania
+        z&#160;dóbr kultury.</p>
+
+        <p class="info">
+        Co roku do domeny publicznej przechodzi twórczość kolejnych autorów.
+        Dzięki Twojemu wsparciu będziemy je mogli udostępnić wszystkim bezpłatnie.
+        </p>
+
+        <p class="info">
+            <strong>Jak możesz pomóc?</strong>
+        </p>
+
+        <p class="info">
+            <img src="jedenprocent.png" alt="Logo 1%" /><br/>
+            Przekaż 1% podatku na rozwój Wolnych Lektur:<br/>
+            Fundacja Nowoczesna Polska<br/>
+            KRS 0000070056
+        </p>
+
+        <p class="info">
+            Dołącz do <a href="https://wolnelektury.pl/towarzystwo/">Towarzystwa Przyjaciół Wolnych Lektur</a> i pomóż nam rozwijać bibliotekę.
+        </p>
+
+        <p class="info">
+            Przekaż darowiznę na konto:
+            <a href="http://nowoczesnapolska.org.pl/pomoz-nam/wesprzyj-nas/">szczegóły
+            na stronie Fundacji</a>.
+        </p>
+
+    </div>
+  </body>
+</html>
diff --git a/src/librarian/epub/toc.html b/src/librarian/epub/toc.html
deleted file mode 100755 (executable)
index 3ff7556..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-    <head>
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
-        <title>WolneLektury.pl</title>
-        <link rel="stylesheet" href="style.css" type="text/css" />
-    </head>
-    <body>
-        <div id="book-text">
-            <h1>Spis treści</h1>
-            %s
-        </div>
-    </body>
-</html>
diff --git a/src/librarian/epub/toc.xhtml b/src/librarian/epub/toc.xhtml
new file mode 100644 (file)
index 0000000..21c696f
--- /dev/null
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml">
+    <head>
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+        <title>WolneLektury.pl</title>
+        <link rel="stylesheet" href="style.css" type="text/css" />
+    </head>
+    <body>
+        <div id="book-text">
+            <h1>Spis treści</h1>
+            %s
+        </div>
+    </body>
+</html>
index 5588d3e..cd22462 100644 (file)
@@ -1,14 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
-  <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
-  <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
 
   <xsl:template match="/">
     <html xmlns="http://www.w3.org/1999/xhtml">
       <head>
         <link rel="stylesheet" href="style.css" type="text/css" />
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         <title>
           <xsl:text>Przypisy</xsl:text>
         </title>
@@ -31,7 +29,7 @@
   </xsl:template>
 
   <xsl:template match="pa|pe|pr|pt" mode="przypis">
-    <p id="annotation-{@number}" class="annotation" xmlns="http://www.w3.org/1999/xhtml"><a href="part{@part}.html#anchor-{@number}" xmlns="http://www.w3.org/1999/xhtml"><xsl:value-of select="@number" /></a>. <xsl:apply-templates /><xsl:if test="name()='pa'"> [przypis autorski]</xsl:if><xsl:if test="name()='pt'"> [przypis tłumacza]</xsl:if><xsl:if test="name()='pr'"> [przypis redakcyjny]</xsl:if><xsl:if test="name()='pe'"> [przypis edytorski]</xsl:if></p>
+    <p id="annotation-{@number}" class="annotation" xmlns="http://www.w3.org/1999/xhtml"><a href="part{@part}.xhtml#anchor-{@number}" xmlns="http://www.w3.org/1999/xhtml"><xsl:value-of select="@number" /></a>. <xsl:apply-templates /><xsl:if test="name()='pa'"> [przypis autorski]</xsl:if><xsl:if test="name()='pt'"> [przypis tłumacza]</xsl:if><xsl:if test="name()='pr'"> [przypis redakcyjny]</xsl:if><xsl:if test="name()='pe'"> [przypis edytorski]</xsl:if></p>
     <xsl:text>&#xa;</xsl:text>
   </xsl:template>
 
index 8c0e09a..fc9018d 100644 (file)
@@ -1,14 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
-  <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
-  <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
 
   <xsl:template match="/">
     <html xmlns="http://www.w3.org/1999/xhtml">
       <head>
         <link rel="stylesheet" href="style.css" type="text/css" />
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         <title>
           <xsl:text>Strona tytułowa</xsl:text>
         </title>
diff --git a/src/librarian/epub/xsltContent.xsl b/src/librarian/epub/xsltContent.xsl
deleted file mode 100644 (file)
index 24315c5..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wl="http://wolnelektury.pl/functions">
-  <xsl:output method="html" version="1.0" omit-xml-declaration="no" />
-
-  <xsl:template match="/">
-    <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookId" version="2.0">
-      <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf">
-        <xsl:apply-templates select="//dc:title" />
-        <dc:language>
-          <xsl:apply-templates select="//dc:language" mode="language" />
-        </dc:language>
-        <dc:identifier id="BookId" opf:scheme="URI">
-          <xsl:apply-templates select="//dc:identifier.url" />
-        </dc:identifier>
-        <dc:subject>
-          <xsl:apply-templates select="//dc:identifier.url" />
-        </dc:subject>
-        <dc:creator opf:role="aut">
-          <xsl:attribute name="opf:file-as">
-            <xsl:value-of select="//dc:creator" />
-          </xsl:attribute>
-          <xsl:for-each select="//dc:creator/text()">
-              <xsl:value-of select="wl:person_name(.)"/>
-              <xsl:if test="not(position() = last())">, </xsl:if>
-          </xsl:for-each>
-        </dc:creator>
-        <dc:publisher>
-          <xsl:for-each select="//dc:publisher/text()">
-            <xsl:value-of select="."/>
-            <xsl:if test="not(position() = last())">; </xsl:if>
-          </xsl:for-each>
-        </dc:publisher>
-        <dc:date opf:event="publication">
-          <xsl:apply-templates select="//dc:date" />
-        </dc:date>
-      </metadata>
-      <manifest>
-        <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
-        <item id="style" href="style.css" media-type="text/css" />
-        <item id="titlePage" href="title.html" media-type="application/xhtml+xml" />
-        <item id="logo_wolnelektury" href="logo_wolnelektury.png" media-type="image/png" />
-        <item id="jedenprocent" href="jedenprocent.png" media-type="image/png" />
-      </manifest>
-      <spine toc="toc">
-        <itemref idref="titlePage" />
-      </spine>
-      <guide>
-        <reference type="text" title="Początek" href="part1.html" />
-      </guide>
-    </package>
-  </xsl:template>
-
-  <xsl:template match="dc:title" >
-    <dc:title>
-      <xsl:value-of select="." />
-    </dc:title>
-  </xsl:template>
-  
-  <xsl:template match="text()" mode="person">
-    <xsl:value-of select="wl:person_name(.)" />
-  </xsl:template>
-  
-  <xsl:template match="text()" mode="language">
-    <xsl:value-of select="wl:lang_code_3to2(.)" />
-  </xsl:template>
-
-</xsl:stylesheet>
index e29c5f4..3bd5c4c 100644 (file)
@@ -6,15 +6,13 @@
     xmlns:wl="http://wolnelektury.pl/functions"
     xmlns:date="http://exslt.org/dates-and-times">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
-  <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
-  <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
   <xsl:param name="outputtype"/>
 
   <xsl:template match="utwor">
     <html>
       <head>
         <link rel="stylesheet" href="style.css" type="text/css" />
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         <title>
           <xsl:text>Strona redakcyjna</xsl:text>
         </title>
index 724d983..93767cf 100644 (file)
@@ -1,14 +1,12 @@
 <?xml version="1.0" encoding="utf-8"?>
 <xsl:stylesheet version="1.0"  xmlns="http://www.w3.org/1999/xhtml" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:wl="http://wolnelektury.pl/functions" exclude-result-prefixes="mml wl">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
-  <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
-  <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
 
   <xsl:template match="/" >
     <xsl:element name="html" xmlns="http://www.w3.org/1999/xhtml">
       <xsl:element name="head">
         <link rel="stylesheet" href="style.css" type="text/css" />
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         <title>
           WolneLektury.pl
         </title>
   <xsl:template match="extra" />
 
   <xsl:template match="pe|pa|pr|pt" >
-    <a class="anchor" id="anchor-{.}" href="annotations.html#annotation-{.}"
+    <a class="anchor" id="anchor-{.}" href="annotations.xhtml#annotation-{.}"
        xmlns="http://www.w3.org/1999/xhtml"><sup xmlns="http://www.w3.org/1999/xhtml"><xsl:apply-templates /></sup></a>
   </xsl:template>
 
index 0a7279b..c19a6ca 100644 (file)
@@ -5,15 +5,13 @@
     xmlns:dc="http://purl.org/dc/elements/1.1/"
     xmlns:wl="http://wolnelektury.pl/functions">
   <xsl:output method="html" version="1.0" encoding="utf-8" />
-  <xsl:output doctype-system="http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" />
-  <xsl:output doctype-public="-//W3C//DTD XHTML 1.1//EN" />
   <xsl:param name="outputtype"/>
 
   <xsl:template match="/">
     <html>
       <head>
         <link rel="stylesheet" href="style.css" type="text/css" />
-        <meta http-equiv="Content-Type" content="application/xhtml+xml; charset=utf-8" />
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
         <title>
           <xsl:text>Strona tytułowa</xsl:text>
         </title>
index 00f1f6e..15e931c 100644 (file)
@@ -111,21 +111,19 @@ def reg_texcommand():
     _register_function(texcommand)
 
 
-def reg_lang_code_3to2():
-    def lang_code_3to2(context, text):
-        """Convert 3-letter language code to 2-letter code"""
-        result = ''
-        text = ''.join(text)
-        with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
-            for line in f.read().decode('latin1').split('\n'):
-                list = line.strip().split('|')
-                if list[0] == text:
-                    result = list[2]
-        if result == '':
-            return text
-        else:
-            return result
-    _register_function(lang_code_3to2)
+def lang_code_3to2(text):
+    """Convert 3-letter language code to 2-letter code"""
+    result = ''
+    text = ''.join(text)
+    with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
+        for line in f.read().decode('latin1').split('\n'):
+            codes = line.strip().split('|')
+            if codes[0] == text:
+                result = codes[2]
+    if result == '':
+        return text
+    else:
+        return result
 
 
 def mathml_latex(context, trees):
index a4eef5c..337db75 100644 (file)
@@ -30,7 +30,7 @@ def transform(wldoc, verbose=False, sample=None, cover=None,
     del wldoc
 
     epub = document.as_epub(verbose=verbose, sample=sample,
-                            html_toc=True, cover=cover or True, flags=flags,
+                            cover=cover or True, flags=flags,
                             hyphenate=hyphenate, ilustr_path=ilustr_path,
                             output_type='mobi')
     if verbose:
index 4ac874a..9ff1b41 100644 (file)
@@ -5,7 +5,9 @@
 #
 from __future__ import unicode_literals
 
+import subprocess
 from zipfile import ZipFile
+from ebooklib import epub
 from lxml import html
 from nose.tools import *
 from librarian import DirDocProvider
@@ -14,14 +16,14 @@ from tests.utils import get_fixture
 
 
 def test_transform():
-    epub = WLDocument.from_file(
+    epub_file = WLDocument.from_file(
             get_fixture('text', 'asnyk_zbior.xml'),
             provider=DirDocProvider(get_fixture('text', ''))
-        ).as_epub(flags=['without_fonts']).get_file()
-    zipf = ZipFile(epub)
+        ).as_epub(cover=True, flags=['without_fonts'])
+    zipf = ZipFile(epub_file.get_file())
 
     # Check contributor list.
-    last = zipf.open('OPS/last.html')
+    last = zipf.open('EPUB/last.xhtml')
     tree = html.parse(last)
     editors_attribution = False
     for par in tree.findall("//p"):
@@ -33,6 +35,60 @@ def test_transform():
                 u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.')
     assert_true(editors_attribution)
 
+    # Check that we have a valid EPUB.
+    assert_equal(
+        subprocess.call([
+            'epubcheck', '-quiet', epub_file.get_filename()
+        ]),
+        0
+    )
+
+    book = epub.read_epub(epub_file.get_filename())
+
+    # Check that guide items are there.
+    assert_equals(
+        book.guide,
+        [
+            {'href': 'part1.xhtml', 'title': 'Początek', 'type': 'text'},
+            {'href': 'cover.xhtml', 'title': 'Okładka', 'type': 'cover'}
+        ]
+    )
+
+    # Check that metadata is there.
+    DC = "http://purl.org/dc/elements/1.1/"
+    OPF = "http://www.idpf.org/2007/opf"
+
+    assert_equals(
+        book.get_metadata(OPF, "cover"),
+        [(None, {'name': 'cover', 'content': 'cover-img'})]
+    )
+    assert_equals(
+        book.get_metadata(DC, "title"),
+        [('Poezye', {})]
+    )
+    assert_equals(
+        book.get_metadata(DC, "language"),
+        [('pl', {})]
+    )
+    assert_equals(
+        book.get_metadata(DC, "identifier"),
+        [('http://wolnelektury.pl/katalog/lektura/poezye', {
+            'id': 'id',
+        })]
+    )
+    assert_equals(
+        book.get_metadata(DC, "creator"),
+        [('Adam Asnyk', {"id": "creator"})]
+    )
+    assert_equals(
+        book.get_metadata(DC, "publisher"),
+        [('Fundacja Nowoczesna Polska', {})]
+    )
+    assert_equals(
+        book.get_metadata(DC, "date"),
+        [("2007-09-06", {})]
+    )
+
 
 def test_transform_hyphenate():
     epub = WLDocument.from_file(