From ca8319931f449468918067367133ff25f9b19f30 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Fri, 19 Jun 2020 17:02:51 +0200 Subject: [PATCH] EPUB3 support. --- scripts/book2epub | 5 - setup.py | 1 + src/librarian/book2anything.py | 11 +- src/librarian/epub.py | 558 +++++++++--------- src/librarian/epub/cover.html | 13 - src/librarian/epub/emptyChunk.html | 8 - src/librarian/epub/emptyChunk.xhtml | 8 + .../epub/{support.html => support.xhtml} | 4 +- src/librarian/epub/{toc.html => toc.xhtml} | 4 +- src/librarian/epub/xsltAnnotations.xsl | 6 +- src/librarian/epub/xsltChunkTitle.xsl | 4 +- src/librarian/epub/xsltContent.xsl | 67 --- src/librarian/epub/xsltLast.xsl | 4 +- src/librarian/epub/xsltScheme.xsl | 6 +- src/librarian/epub/xsltTitle.xsl | 4 +- src/librarian/functions.py | 28 +- src/librarian/mobi.py | 2 +- tests/test_epub.py | 64 +- 18 files changed, 369 insertions(+), 428 deletions(-) delete mode 100644 src/librarian/epub/cover.html delete mode 100644 src/librarian/epub/emptyChunk.html create mode 100644 src/librarian/epub/emptyChunk.xhtml rename src/librarian/epub/{support.html => support.xhtml} (88%) mode change 100755 => 100644 rename src/librarian/epub/{toc.html => toc.xhtml} (61%) mode change 100755 => 100644 delete mode 100644 src/librarian/epub/xsltContent.xsl diff --git a/scripts/book2epub b/scripts/book2epub index 5b906b9..c1027c5 100755 --- a/scripts/book2epub +++ b/scripts/book2epub @@ -19,11 +19,6 @@ class Book2Epub(Book2Anything): action='store_true', default=False, help='mark the output as a working copy') ] - transform_options = [ - Option('-t', '--html-toc', - action='store_true', dest='html_toc', default=False, - help='with inline html toc') - ] if __name__ == '__main__': diff --git a/setup.py b/setup.py index 53cc182..0466e08 100755 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ setup( 'Pillow', 'six', 'texml', + 'ebooklib', ], scripts=['scripts/book2html', 'scripts/book2txt', diff --git a/src/librarian/book2anything.py b/src/librarian/book2anything.py index d954ce6..a1d5687 100755 --- a/src/librarian/book2anything.py +++ b/src/librarian/book2anything.py @@ -11,7 +11,7 @@ import optparse import six from librarian import DirDocProvider, ParseError from librarian.parser import WLDocument -from librarian.cover import make_cover +from librarian.cover import make_cover, COVER_CLASSES class Option(object): @@ -82,6 +82,10 @@ class Book2Anything(object): help='prefix for image download cache' + (' (implies --with-cover)' if cls.cover_optional else '') ) + parser.add_option( + '--cover-class', dest='cover_class', + help='cover class name' + ) for option in ( cls.parser_options + cls.transform_options @@ -118,11 +122,14 @@ class Book2Anything(object): def cover_class(book_info, *args, **kwargs): return make_cover( book_info, image_cache=options.image_cache, + cover_class=options.cover_class, *args, **kwargs ) transform_args['cover'] = cover_class elif not cls.cover_optional or options.with_cover: - transform_args['cover'] = make_cover + cover_class = COVER_CLASSES.get( + options.cover_class, make_cover) + transform_args['cover'] = cover_class # Do some real work try: diff --git a/src/librarian/epub.py b/src/librarian/epub.py index 137796e..a8c6680 100644 --- a/src/librarian/epub.py +++ b/src/librarian/epub.py @@ -9,16 +9,16 @@ import os import os.path import re import subprocess -from six import BytesIO +import six from copy import deepcopy from mimetypes import guess_type +from ebooklib import epub from lxml import etree -import zipfile from tempfile import mkdtemp, NamedTemporaryFile from shutil import rmtree -from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile +from librarian import RDFNS, WLNS, DCNS, OutputFile from librarian.cover import make_cover from librarian import functions, get_resource @@ -26,7 +26,6 @@ from librarian import functions, get_resource from librarian.hyphenator import Hyphenator functions.reg_person_name() -functions.reg_lang_code_3to2() def squeeze_whitespace(s): @@ -34,21 +33,9 @@ def squeeze_whitespace(s): def set_hyph_language(source_tree): - def get_short_lng_code(text): - result = '' - text = ''.join(text) - with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: - for line in f.read().decode('latin1').split('\n'): - list = line.strip().split('|') - if list[0] == text: - result = list[2] - if result == '': - return text - else: - return result bibl_lng = etree.XPath('//dc:language//text()', namespaces={'dc': str(DCNS)})(source_tree) - short_lng = get_short_lng_code(bibl_lng[0]) + short_lng = functions.lang_code_3to2(bibl_lng[0]) try: return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' + short_lng + '.dic')) @@ -249,104 +236,6 @@ def replace_by_verse(tree): Stanza(stanza).versify() -def add_to_manifest(manifest, partno): - """ Adds a node to the manifest section in content.opf file """ - - partstr = 'part%d' % partno - e = manifest.makeelement( - OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html', - 'media-type': 'application/xhtml+xml'} - ) - manifest.append(e) - - -def add_to_spine(spine, partno): - """ Adds a node to the spine section in content.opf file """ - - e = spine.makeelement( - OPFNS('itemref'), - attrib={'idref': 'part%d' % partno} - ) - spine.append(e) - - -class TOC(object): - def __init__(self, name=None, part_href=None): - self.children = [] - self.name = name - self.part_href = part_href - self.sub_number = None - - def add(self, name, part_href, level=0, is_part=True, index=None): - assert level == 0 or index is None - if level > 0 and self.children: - return self.children[-1].add(name, part_href, level - 1, is_part) - else: - t = TOC(name) - t.part_href = part_href - if index is not None: - self.children.insert(index, t) - else: - self.children.append(t) - if not is_part: - t.sub_number = len(self.children) + 1 - return t.sub_number - - def append(self, toc): - self.children.append(toc) - - def extend(self, toc): - self.children.extend(toc.children) - - def depth(self): - if self.children: - return max((c.depth() for c in self.children)) + 1 - else: - return 0 - - def href(self): - src = self.part_href - if self.sub_number is not None: - src += '#sub%d' % self.sub_number - return src - - def write_to_xml(self, nav_map, counter=1): - for child in self.children: - nav_point = nav_map.makeelement(NCXNS('navPoint')) - nav_point.set('id', 'NavPoint-%d' % counter) - nav_point.set('playOrder', str(counter)) - - nav_label = nav_map.makeelement(NCXNS('navLabel')) - text = nav_map.makeelement(NCXNS('text')) - if child.name is not None: - text.text = re.sub(r'\n', ' ', child.name) - else: - text.text = child.name - nav_label.append(text) - nav_point.append(nav_label) - - content = nav_map.makeelement(NCXNS('content')) - content.set('src', child.href()) - nav_point.append(content) - nav_map.append(nav_point) - counter = child.write_to_xml(nav_point, counter + 1) - return counter - - def html_part(self, depth=0): - texts = [] - for child in self.children: - texts.append( - "
%s
" % - (depth, child.href(), child.name)) - texts.append(child.html_part(depth + 1)) - return "\n".join(texts) - - def html(self): - with open(get_resource('epub/toc.html'), 'rb') as f: - t = f.read().decode('utf-8') - return t % self.html_part() - - def used_chars(element): """ Lists characters used in an ETree Element """ chars = set((element.text or '') + (element.tail or '')) @@ -413,19 +302,43 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, and a set of used characters. """ - toc = TOC() + toc = [] for element in chunk_xml[0]: if element.tag == "naglowek_czesc": - toc.add(node_name(element), "part%d.html#book-text" % chunk_no) + toc.append( + ( + epub.Link( + "part%d.xhtml#book-text" % chunk_no, + node_name(element), + "part%d-text" % chunk_no + ), + [] + ) + ) elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): - toc.add(node_name(element), "part%d.html" % chunk_no) + toc.append( + ( + epub.Link( + "part%d.xhtml" % chunk_no, + node_name(element), + "part%d" % chunk_no + ), + [] + ) + ) elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, - level=1, is_part=False) - element.set('sub', str(subnumber)) + subnumber = len(toc[-1][1]) + toc[-1][1].append( + epub.Link( + "part%d.xhtml#sub%d" % (chunk_no, subnumber), + node_name(element), + "part%d-sub%d" % (chunk_no, subnumber) + ) + ) + element.set('sub', six.text_type(subnumber)) if empty: if not _empty_html_static: - with open(get_resource('epub/emptyChunk.html')) as f: + with open(get_resource('epub/emptyChunk.xhtml')) as f: _empty_html_static.append(f.read()) chars = set() output_html = _empty_html_static[0] @@ -437,13 +350,21 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, output_html = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", - doctype='' + doctype='' ) return output_html, toc, chars -def transform(wldoc, verbose=False, style=None, html_toc=False, +def remove_empty_lists_from_toc(toc): + for i, e in enumerate(toc): + if isinstance(e, tuple): + if e[1]: + remove_empty_lists_from_toc(e[1]) + else: + toc[i] = e[0] + + +def transform(wldoc, verbose=False, style=None, sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'): """ produces a EPUB file @@ -465,7 +386,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, # every input file will have a TOC entry, # pointing to starting chunk - toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter) + toc = [ + ( + epub.Link( + "part%d.xhtml" % chunk_counter, + wldoc.book_info.title, + "path%d-start" % chunk_counter + ), + [] + ) + ] chars = set() if first: # write book title page @@ -475,17 +405,42 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", - doctype='' + doctype='' + ) + item = epub.EpubItem( + uid="titlePage", + file_name="title.xhtml", + media_type="application/xhtml+xml", + content=squeeze_whitespace(html_string) ) - zip.writestr('OPS/title.html', squeeze_whitespace(html_string)) + spine.append(item) + output.add_item(item) # add a title page TOC entry - toc.add(u"Strona tytułowa", "title.html") + toc[-1][1].append( + epub.Link( + "title.xhtml", + "Strona tytułowa", + "title", + ) + ) + + item = epub.EpubNav() + toc[-1][1].append( + epub.Link( + "nav.xhtml", + "Spis treści", + "nav" + ) + ) + output.add_item(item) + spine.append(item) + elif wldoc.book_info.parts: # write title page for every parent if sample is not None and sample <= 0: chars = set() - html_string = open(get_resource('epub/emptyChunk.html')).read() + html_string = open( + get_resource('epub/emptyChunk.xhtml')).read() else: html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) @@ -493,13 +448,17 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", - doctype='' + doctype='' ) - zip.writestr('OPS/part%d.html' % chunk_counter, - squeeze_whitespace(html_string)) - add_to_manifest(manifest, chunk_counter) - add_to_spine(spine, chunk_counter) + item = epub.EpubItem( + uid="part%d" % chunk_counter, + file_name="part%d.xhtml" % chunk_counter, + media_type="application/xhtml+xml", + content=squeeze_whitespace(html_string) + ) + output.add_item(item) + spine.append(item) + chunk_counter += 1 if len(wldoc.edoc.getroot()) > 1: @@ -524,18 +483,22 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, chunk_html, chunk_toc, chunk_chars = transform_chunk( chunk_xml, chunk_counter, annotations, empty) - toc.extend(chunk_toc) + toc[-1][1].extend(chunk_toc) chars = chars.union(chunk_chars) - zip.writestr('OPS/part%d.html' % chunk_counter, - squeeze_whitespace(chunk_html)) - add_to_manifest(manifest, chunk_counter) - add_to_spine(spine, chunk_counter) + item = epub.EpubItem( + uid="part%d" % chunk_counter, + file_name="part%d.xhtml" % chunk_counter, + media_type="application/xhtml+xml", + content=squeeze_whitespace(chunk_html) + ) + output.add_item(item) + spine.append(item) chunk_counter += 1 for child in wldoc.parts(): child_toc, chunk_counter, chunk_chars, sample = transform_file( child, chunk_counter, first=False, sample=sample) - toc.append(child_toc) + toc[-1][1].extend(child_toc) chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample @@ -561,15 +524,34 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if document.book_info.thanks: document.edoc.getroot().set('thanks', document.book_info.thanks) - opf = xslt(document.book_info.to_etree(), - get_resource('epub/xsltContent.xsl')) - manifest = opf.find('.//' + OPFNS('manifest')) - guide = opf.find('.//' + OPFNS('guide')) - spine = opf.find('.//' + OPFNS('spine')) + output = epub.EpubBook() + output.set_identifier(six.text_type(document.book_info.url)) + output.set_language(functions.lang_code_3to2(document.book_info.language)) + output.set_title(document.book_info.title) + for author in document.book_info.authors: + output.add_author( + author.readable(), + file_as=six.text_type(author) + ) + for translator in document.book_info.translators: + output.add_author( + translator.readable(), + file_as=six.text_type(translator), + role='translator' + ) + for publisher in document.book_info.publisher: + output.add_metadata("DC", "publisher", publisher) + output.add_metadata("DC", "date", document.book_info.created_at) - output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', - delete=False) - zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) + output.guide.append({ + "type": "text", + "title": "Początek", + "href": "part1.xhtml" + }) + + output.add_item(epub.EpubNcx()) + + spine = output.spine functions.reg_mathml_epub(zip) @@ -580,55 +562,70 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if filename not in ilustr_elements: continue file_path = os.path.join(ilustr_path, filename) - zip.write(file_path, os.path.join('OPS', filename)) - image_id = 'image%s' % i - manifest.append(etree.fromstring( - '' % ( - image_id, filename, guess_type(file_path)[0]) - )) + with open(file_path, 'rb') as f: + output.add_item( + epub.EpubItem( + uid='image%s' % i, + file_name=filename, + media_type=guess_type(file_path)[0], + content=f.read() + ) + ) # write static elements - mime = zipfile.ZipInfo() - mime.filename = 'mimetype' - mime.compress_type = zipfile.ZIP_STORED - mime.extra = b'' - zip.writestr(mime, b'application/epub+zip') - zip.writestr( - 'META-INF/container.xml', - b'' - b'' - b'' - b'' - ) - zip.write(get_resource('res/wl-logo-small.png'), - os.path.join('OPS', 'logo_wolnelektury.png')) - zip.write(get_resource('res/jedenprocent.png'), - os.path.join('OPS', 'jedenprocent.png')) + + with open(get_resource('res/wl-logo-small.png'), 'rb') as f: + output.add_item( + epub.EpubItem( + uid="logo_wolnelektury.png", + file_name="logo_wolnelektury.png", + media_type="image/png", + content=f.read() + ) + ) + with open(get_resource('res/jedenprocent.png'), 'rb') as f: + output.add_item( + epub.EpubItem( + uid="jedenprocent", + file_name="jedenprocent.png", + media_type="image/png", + content=f.read() + ) + ) + if not style: style = get_resource('epub/style.css') - zip.write(style, os.path.join('OPS', 'style.css')) + with open(style, 'rb') as f: + output.add_item( + epub.EpubItem( + uid="style", + file_name="style.css", + media_type="text/css", + content=f.read() + ) + ) if cover: if cover is True: cover = make_cover - cover_file = BytesIO() + cover_file = six.BytesIO() bound_cover = cover(document.book_info) bound_cover.save(cover_file) cover_name = 'cover.%s' % bound_cover.ext() - zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue()) - del cover_file - cover_tree = etree.parse(get_resource('epub/cover.html')) - cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name) - zip.writestr('OPS/cover.html', etree.tostring( - cover_tree, pretty_print=True, xml_declaration=True, - encoding="utf-8", - doctype='' - )) + output.set_cover( + file_name=cover_name, + content=cover_file.getvalue(), + ) + spine.append('cover') + output.guide.append({ + "type": "cover", + "href": "cover.xhtml", + "title": "Okładka", + }) + + del cover_file if bound_cover.uses_dc_cover: if document.book_info.cover_by: @@ -638,97 +635,87 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) - manifest.append(etree.fromstring( - '' - )) - manifest.append(etree.fromstring( - '' % ( - cover_name, bound_cover.mime_type() - ) - )) - spine.insert(0, etree.fromstring('')) - opf.getroot()[0].append(etree.fromstring( - '' - )) - guide.append(etree.fromstring( - '' - )) - annotations = etree.Element('annotations') - toc_file = etree.fromstring( - b'' - b'' - b'' - ) - nav_map = toc_file[-1] - - if html_toc: - manifest.append(etree.fromstring( - '' - )) - spine.append(etree.fromstring( - '')) - guide.append(etree.fromstring( - '' - )) - toc, chunk_counter, chars, sample = transform_file(document, sample=sample) - - if len(toc.children) < 2: - toc.add(u"Początek utworu", "part1.html") + output.toc = toc[0][1] + + if len(toc) < 2: + toc.append( + epub.Link( + "part1.xhtml", + "Początek utworu", + "part1" + ) + ) # Last modifications in container files and EPUB creation if len(annotations) > 0: - toc.add("Przypisy", "annotations.html") - manifest.append(etree.fromstring( - '' - )) - spine.append(etree.fromstring( - '')) + toc.append( + epub.Link( + "annotations.xhtml", + "Przypisy", + "annotations" + ) + ) replace_by_verse(annotations) html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl')) chars = chars.union(used_chars(html_tree.getroot())) - zip.writestr('OPS/annotations.html', etree.tostring( - html_tree, pretty_print=True, xml_declaration=True, - encoding="utf-8", - doctype='' - )) - toc.add("Wesprzyj Wolne Lektury", "support.html") - manifest.append(etree.fromstring( - '' - )) - spine.append(etree.fromstring( - '')) - html_string = open(get_resource('epub/support.html'), 'rb').read() + item = epub.EpubItem( + uid="annotations", + file_name="annotations.xhtml", + media_type="application/xhtml+xml", + content=etree.tostring( + html_tree, pretty_print=True, xml_declaration=True, + encoding="utf-8", + doctype='' + ) + ) + output.add_item(item) + spine.append(item) + + toc.append( + epub.Link( + "support.xhtml", + "Wesprzyj Wolne Lektury", + "support" + ) + ) + with open(get_resource('epub/support.xhtml'), 'rb') as f: + html_string = f.read() chars.update(used_chars(etree.fromstring(html_string))) - zip.writestr('OPS/support.html', squeeze_whitespace(html_string)) - - toc.add("Strona redakcyjna", "last.html") - manifest.append(etree.fromstring( - '' - )) - spine.append(etree.fromstring( - '')) + item = epub.EpubItem( + uid="support", + file_name="support.xhtml", + media_type="application/xhtml+xml", + content=squeeze_whitespace(html_string) + ) + output.add_item(item) + spine.append(item) + + toc.append( + epub.Link( + "last.xhtml", + "Strona redakcyjna", + "last" + ) + ) html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type) chars.update(used_chars(html_tree.getroot())) - zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring( - html_tree, pretty_print=True, xml_declaration=True, - encoding="utf-8", - doctype='' - ))) + item = epub.EpubItem( + uid="last", + file_name="last.xhtml", + media_type="application/xhtml+xml", + content=squeeze_whitespace(etree.tostring( + html_tree, pretty_print=True, xml_declaration=True, + encoding="utf-8", + doctype='' + )) + ) + output.add_item(item) + spine.append(item) if not flags or 'without-fonts' not in flags: # strip fonts @@ -754,36 +741,23 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, dev_null = open(os.devnull, 'w') subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env) - zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) - manifest.append(etree.fromstring( - '' - % (fname, fname) - )) + with open(os.path.join(tmpdir, fname), 'rb') as f: + output.add_item( + epub.EpubItem( + uid=fname, + file_name=fname, + media_type="font/ttf", + content=f.read() + ) + ) rmtree(tmpdir) if cwd is not None: os.chdir(cwd) - zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, - xml_declaration=True, encoding="utf-8")) - title = document.book_info.title - attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount", - "dtb:maxPageNumber") - for st in attributes: - meta = toc_file.makeelement(NCXNS('meta')) - meta.set('name', st) - meta.set('content', '0') - toc_file[0].append(meta) - toc_file[0][0].set('content', str(document.book_info.url)) - toc_file[0][1].set('content', str(toc.depth())) - set_inner_xml(toc_file[1], ''.join(('', title, ''))) - - # write TOC - if html_toc: - toc.add(u"Spis treści", "toc.html", index=1) - zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) - toc.write_to_xml(nav_map) - zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True, - xml_declaration=True, encoding="utf-8")) - zip.close() + remove_empty_lists_from_toc(output.toc) + + output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', + delete=False) + output_file.close() + epub.write_epub(output_file.name, output, {'epub3_landmark': False}) return OutputFile.from_filename(output_file.name) diff --git a/src/librarian/epub/cover.html b/src/librarian/epub/cover.html deleted file mode 100644 index 784067c..0000000 --- a/src/librarian/epub/cover.html +++ /dev/null @@ -1,13 +0,0 @@ - - - - - Okładka - - - -
- Okładka -
- - \ No newline at end of file diff --git a/src/librarian/epub/emptyChunk.html b/src/librarian/epub/emptyChunk.html deleted file mode 100644 index 1452a99..0000000 --- a/src/librarian/epub/emptyChunk.html +++ /dev/null @@ -1,8 +0,0 @@ - - - - - WolneLektury.pl - - - \ No newline at end of file diff --git a/src/librarian/epub/emptyChunk.xhtml b/src/librarian/epub/emptyChunk.xhtml new file mode 100644 index 0000000..d203c94 --- /dev/null +++ b/src/librarian/epub/emptyChunk.xhtml @@ -0,0 +1,8 @@ + + + + + WolneLektury.pl + + + diff --git a/src/librarian/epub/support.html b/src/librarian/epub/support.xhtml old mode 100755 new mode 100644 similarity index 88% rename from src/librarian/epub/support.html rename to src/librarian/epub/support.xhtml index 9010693..2528ee4 --- a/src/librarian/epub/support.html +++ b/src/librarian/epub/support.xhtml @@ -1,7 +1,7 @@ - + - + Wesprzyj Wolne Lektury diff --git a/src/librarian/epub/toc.html b/src/librarian/epub/toc.xhtml old mode 100755 new mode 100644 similarity index 61% rename from src/librarian/epub/toc.html rename to src/librarian/epub/toc.xhtml index 3ff7556..21c696f --- a/src/librarian/epub/toc.html +++ b/src/librarian/epub/toc.xhtml @@ -1,7 +1,7 @@ - + - + WolneLektury.pl diff --git a/src/librarian/epub/xsltAnnotations.xsl b/src/librarian/epub/xsltAnnotations.xsl index 5588d3e..cd22462 100644 --- a/src/librarian/epub/xsltAnnotations.xsl +++ b/src/librarian/epub/xsltAnnotations.xsl @@ -1,14 +1,12 @@ - - - + <xsl:text>Przypisy</xsl:text> @@ -31,7 +29,7 @@ -

. [przypis autorski] [przypis tłumacza] [przypis redakcyjny] [przypis edytorski]

+

. [przypis autorski] [przypis tłumacza] [przypis redakcyjny] [przypis edytorski]

diff --git a/src/librarian/epub/xsltChunkTitle.xsl b/src/librarian/epub/xsltChunkTitle.xsl index 8c0e09a..fc9018d 100644 --- a/src/librarian/epub/xsltChunkTitle.xsl +++ b/src/librarian/epub/xsltChunkTitle.xsl @@ -1,14 +1,12 @@ - - - + <xsl:text>Strona tytułowa</xsl:text> diff --git a/src/librarian/epub/xsltContent.xsl b/src/librarian/epub/xsltContent.xsl deleted file mode 100644 index 24315c5..0000000 --- a/src/librarian/epub/xsltContent.xsl +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - ; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/librarian/epub/xsltLast.xsl b/src/librarian/epub/xsltLast.xsl index e29c5f4..3bd5c4c 100644 --- a/src/librarian/epub/xsltLast.xsl +++ b/src/librarian/epub/xsltLast.xsl @@ -6,15 +6,13 @@ xmlns:wl="http://wolnelektury.pl/functions" xmlns:date="http://exslt.org/dates-and-times"> - - - + <xsl:text>Strona redakcyjna</xsl:text> diff --git a/src/librarian/epub/xsltScheme.xsl b/src/librarian/epub/xsltScheme.xsl index 724d983..93767cf 100644 --- a/src/librarian/epub/xsltScheme.xsl +++ b/src/librarian/epub/xsltScheme.xsl @@ -1,14 +1,12 @@ - - - + WolneLektury.pl @@ -378,7 +376,7 @@ - diff --git a/src/librarian/epub/xsltTitle.xsl b/src/librarian/epub/xsltTitle.xsl index 0a7279b..c19a6ca 100644 --- a/src/librarian/epub/xsltTitle.xsl +++ b/src/librarian/epub/xsltTitle.xsl @@ -5,15 +5,13 @@ xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wl="http://wolnelektury.pl/functions"> - - - + <xsl:text>Strona tytułowa</xsl:text> diff --git a/src/librarian/functions.py b/src/librarian/functions.py index 00f1f6e..15e931c 100644 --- a/src/librarian/functions.py +++ b/src/librarian/functions.py @@ -111,21 +111,19 @@ def reg_texcommand(): _register_function(texcommand) -def reg_lang_code_3to2(): - def lang_code_3to2(context, text): - """Convert 3-letter language code to 2-letter code""" - result = '' - text = ''.join(text) - with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: - for line in f.read().decode('latin1').split('\n'): - list = line.strip().split('|') - if list[0] == text: - result = list[2] - if result == '': - return text - else: - return result - _register_function(lang_code_3to2) +def lang_code_3to2(text): + """Convert 3-letter language code to 2-letter code""" + result = '' + text = ''.join(text) + with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: + for line in f.read().decode('latin1').split('\n'): + codes = line.strip().split('|') + if codes[0] == text: + result = codes[2] + if result == '': + return text + else: + return result def mathml_latex(context, trees): diff --git a/src/librarian/mobi.py b/src/librarian/mobi.py index a4eef5c..337db75 100644 --- a/src/librarian/mobi.py +++ b/src/librarian/mobi.py @@ -30,7 +30,7 @@ def transform(wldoc, verbose=False, sample=None, cover=None, del wldoc epub = document.as_epub(verbose=verbose, sample=sample, - html_toc=True, cover=cover or True, flags=flags, + cover=cover or True, flags=flags, hyphenate=hyphenate, ilustr_path=ilustr_path, output_type='mobi') if verbose: diff --git a/tests/test_epub.py b/tests/test_epub.py index 4ac874a..9ff1b41 100644 --- a/tests/test_epub.py +++ b/tests/test_epub.py @@ -5,7 +5,9 @@ # from __future__ import unicode_literals +import subprocess from zipfile import ZipFile +from ebooklib import epub from lxml import html from nose.tools import * from librarian import DirDocProvider @@ -14,14 +16,14 @@ from tests.utils import get_fixture def test_transform(): - epub = WLDocument.from_file( + epub_file = WLDocument.from_file( get_fixture('text', 'asnyk_zbior.xml'), provider=DirDocProvider(get_fixture('text', '')) - ).as_epub(flags=['without_fonts']).get_file() - zipf = ZipFile(epub) + ).as_epub(cover=True, flags=['without_fonts']) + zipf = ZipFile(epub_file.get_file()) # Check contributor list. - last = zipf.open('OPS/last.html') + last = zipf.open('EPUB/last.xhtml') tree = html.parse(last) editors_attribution = False for par in tree.findall("//p"): @@ -33,6 +35,60 @@ def test_transform(): u'Adam Fikcyjny, Aleksandra Sekuła, Olga Sutkowska.') assert_true(editors_attribution) + # Check that we have a valid EPUB. + assert_equal( + subprocess.call([ + 'epubcheck', '-quiet', epub_file.get_filename() + ]), + 0 + ) + + book = epub.read_epub(epub_file.get_filename()) + + # Check that guide items are there. + assert_equals( + book.guide, + [ + {'href': 'part1.xhtml', 'title': 'Początek', 'type': 'text'}, + {'href': 'cover.xhtml', 'title': 'Okładka', 'type': 'cover'} + ] + ) + + # Check that metadata is there. + DC = "http://purl.org/dc/elements/1.1/" + OPF = "http://www.idpf.org/2007/opf" + + assert_equals( + book.get_metadata(OPF, "cover"), + [(None, {'name': 'cover', 'content': 'cover-img'})] + ) + assert_equals( + book.get_metadata(DC, "title"), + [('Poezye', {})] + ) + assert_equals( + book.get_metadata(DC, "language"), + [('pl', {})] + ) + assert_equals( + book.get_metadata(DC, "identifier"), + [('http://wolnelektury.pl/katalog/lektura/poezye', { + 'id': 'id', + })] + ) + assert_equals( + book.get_metadata(DC, "creator"), + [('Adam Asnyk', {"id": "creator"})] + ) + assert_equals( + book.get_metadata(DC, "publisher"), + [('Fundacja Nowoczesna Polska', {})] + ) + assert_equals( + book.get_metadata(DC, "date"), + [("2007-09-06", {})] + ) + def test_transform_hyphenate(): epub = WLDocument.from_file( -- 2.20.1