From: Radek Czajka Date: Mon, 5 Jul 2010 11:48:18 +0000 (+0200) Subject: initial epub support X-Git-Tag: 1.7~284 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/4edcff3a9e2a4bb2ecb7ab228e5bf37dd28d7e14?ds=sidebyside initial epub support --- diff --git a/librarian/epub.py b/librarian/epub.py new file mode 100644 index 0000000..50f9562 --- /dev/null +++ b/librarian/epub.py @@ -0,0 +1,359 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from __future__ import with_statement + +import os +import os.path +import shutil +import sys +from copy import deepcopy +from lxml import etree +import zipfile + +from librarian import XMLNamespace, RDFNS, DCNS, WLNS, XHTMLNS + +NCXNS = XMLNamespace("http://www.daisy.org/z3986/2005/ncx/") +OPFNS = XMLNamespace("http://www.idpf.org/2007/opf") + + +def inner_xml(node): + """ returns node's text and children as a string + + >>> print inner_xml(etree.fromstring('xyz')) + xyz + """ + + nt = node.text if node.text is not None else '' + return ''.join([nt] + [etree.tostring(child) for child in node]) + +def set_inner_xml(node, text): + """ sets node's text and children from a string + + >>> e = etree.fromstring('bxx') + >>> set_inner_xml(e, 'xyz') + >>> print etree.tostring(e) + xyz + """ + + + p = etree.fromstring('%s' % text) + node.text = p.text + node[:] = p[:] + + +def node_name(node): + """ Find out a node's name + + >>> print node_name(etree.fromstring('XYZ')) + XYZ + """ + + tempnode = deepcopy(node) + + for p in ('pe', 'pa', 'pt', 'pr', 'motyw'): + for e in tempnode.findall('.//%s' % p): + t = e.tail + e.clear() + e.tail = t + etree.strip_tags(tempnode, '*') + return tempnode.text + + +def xslt(xml, sheet): + if isinstance(xml, etree._Element): + xml = etree.ElementTree(xml) + with open(sheet) as xsltf: + return xml.xslt(etree.parse(xsltf)) + + +_resdir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'epub') +def res(fname): + return os.path.join(_resdir, fname) + + +def replace_characters(node): + def replace_chars(text): + if text is None: + return None + return text.replace("&", "&")\ + .replace("---", "—")\ + .replace("--", "–")\ + .replace(",,", "„")\ + .replace('"', "”")\ + .replace("'", "’") + if node.tag == 'extra': + node.clear() + else: + node.text = replace_chars(node.text) + node.tail = replace_chars(node.tail) + for child in node: + replace_characters(child) + + +def find_annotations(annotations, source, part_number): + for child in source: + if child.tag in ('pe', 'pa', 'pt', 'pr'): + annotation = deepcopy(child) + annotation.set('number', str(len(annotations)+1)) + annotation.set('part', str(part_number)) + annotation.tail = '' + annotations.append(annotation) + tail = child.tail + child.clear() + child.tail = tail + child.text = str(len(annotations)) + if child.tag not in ('extra', 'podtytul'): + find_annotations(annotations, child, part_number) + + +def replace_by_verse(tree): + """ Find stanzas and create new verses in place of a '/' character """ + stanzas = tree.findall('.//' + WLNS('strofa')) + for node in stanzas: + for child_node in node: + if child_node.tag in ('slowo_obce', 'wyroznienie'): + foreign_verses = inner_xml(child_node).split('/\n') + if len(foreign_verses) > 1: + new_foreign = '' + for foreign_verse in foreign_verses: + if foreign_verse.startswith('', foreign_verse, '')) + set_inner_xml(child_node, new_foreign) + verses = inner_xml(node).split('/\n') + if len(verses) > 1: + modified_inner_xml = '' + for verse in verses: + if verse.startswith('', verse, '')) + set_inner_xml(node, modified_inner_xml) + + +def add_to_manifest(manifest, partno): + """ Adds a node to the manifest section in content.opf file """ + partstr = 'part%d' % partno + e = manifest.makeelement(OPFNS('item'), attrib={ + 'id': partstr, + 'href': partstr + '.html', + 'media-type': 'application/xhtml+xml', + }) + manifest.append(e) + + +def add_to_spine(spine, partno): + """ Adds a node to the spine section in content.opf file """ + e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno}); + spine.append(e) + + +def add_nav_point(nav_map, counter, title, part_counter): + nav_point = nav_map.makeelement(NCXNS('navPoint')) + nav_point.set('id', 'NavPoint-%d' % counter) + nav_point.set('playOrder', str(counter)) + + nav_label = nav_map.makeelement(NCXNS('navLabel')) + text = nav_map.makeelement(NCXNS('text')) + text.text = title + nav_label.append(text) + nav_point.append(nav_label) + + content = nav_map.makeelement(NCXNS('content')) + content.set('src', 'part%d.html' % part_counter) + nav_point.append(content) + + nav_map.append(nav_point) + + +def add_nav_point2(nav_map, counter, title, part_counter, subcounter): + nav_point = nav_map.makeelement(NCXNS('navPoint')) + nav_point.set('id', 'NavPoint-%d' % counter) + nav_point.set('playOrder', str(counter)) + + nav_label = nav_map.makeelement(NCXNS('navLabel')) + text = nav_map.makeelement(NCXNS('text')) + text.text = title + nav_label.append(text) + nav_point.append(nav_label) + + content = nav_map.makeelement(NCXNS('content')) + content.set('src', 'part%d.html#sub%d' % (part_counter, subcounter)) + nav_point.append(content) + + nav_map[-1].append(nav_point) + + +def transform(input_file, output_file): + """ produces an epub + + input_file and output_file should be filelike objects + """ + + input_xml = etree.parse(input_file) + + zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) + + mime = zipfile.ZipInfo() + mime.filename = 'mimetype' + mime.compress_type = zipfile.ZIP_STORED + mime.extra = '' + zip.writestr(mime, 'application/epub+zip') + + zip.writestr('META-INF/container.xml', '' \ + '' \ + '') + + metadata_el = input_xml.find('.//'+RDFNS('Description')) + metadatasource = etree.ElementTree(metadata_el) + + opf = xslt(metadatasource, res('xsltContent.xsl')) + + manifest = opf.find('.//' + OPFNS('manifest')) + spine = opf.find('.//' + OPFNS('spine')) + + for fname in 'style.css', 'logo_wolnelektury.png': + zip.write(res(fname), os.path.join('OPS', fname)) + + annotations = etree.Element('annotations') + part_xml = etree.Element('utwor') + etree.SubElement(part_xml, 'master') + + toc_file = etree.fromstring('' \ + '' \ + '' \ + 'Strona tytuÅowa' \ + '' \ + 'PoczÄtek utworu' \ + '') + + main_xml_part = part_xml[0] # byÅo [0][0], master + nav_map = toc_file[-1] # byÅo [-1][-1] + depth = 1 # navmap + + if len(input_xml.getroot()) > 1: + # rdf before style master + main_text = input_xml.getroot()[1] + else: + # rdf in style master + main_text = input_xml.getroot()[0] + + replace_characters(main_text) + zip.writestr('OPS/title.html', + etree.tostring(xslt(input_xml, res('xsltTitle.xsl')), pretty_print=True)) + + # Search for table of contents elements and book division + + stupid_i = stupid_j = stupid_k = 1 + last_node_part = False + for one_part in main_text: + name = one_part.tag + if name in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): + if name == "naglowek_czesc": + stupid_k = 1 + last_node_part = True + find_annotations(annotations, part_xml, stupid_j) + replace_by_verse(part_xml) + zip.writestr('OPS/part%d.html' % stupid_j, + etree.tostring(xslt(part_xml, res('xsltScheme.xsl')), pretty_print=True)) + main_xml_part[:] = [deepcopy(one_part)] + # add to manifest and spine + add_to_manifest(manifest, stupid_j) + add_to_spine(spine, stupid_j) + name_toc = node_name(one_part) + # build table of contents + # i+2 because of title page + add_nav_point(nav_map, stupid_i+2, name_toc, stupid_j + 1) + stupid_i += 1 + stupid_j += 1 + else: + if last_node_part: + main_xml_part.append(one_part) + last_node_part = False + name_toc = node_name(one_part) + add_nav_point(nav_map, stupid_i + 1, name_toc, stupid_j) + else: + stupid_k = 1 + find_annotations(annotations, part_xml, stupid_j) + replace_by_verse(part_xml) + zip.writestr('OPS/part%d.html' % stupid_j, + etree.tostring(xslt(part_xml, res('xsltScheme.xsl')), pretty_print=True)) + # start building a new part + main_xml_part[:] = [deepcopy(one_part)] + add_to_manifest(manifest, stupid_j) + add_to_spine(spine, stupid_j) + name_toc = node_name(one_part) + add_nav_point(nav_map, stupid_i + 2, name_toc, stupid_j + 1) # title page + stupid_j += 1 + stupid_i += 1 + else: + if name in ('naglowek_podrozdzial', 'naglowek_scena'): + depth = 2 + name_toc = node_name(one_part) + add_nav_point2(nav_map, stupid_i + 2, name_toc, stupid_j, stupid_k) + one_part.set('sub', str(stupid_k)) + stupid_k += 1 + stupid_i += 1 + main_xml_part.append(deepcopy(one_part)) + last_node_part = False + find_annotations(annotations, part_xml, stupid_j) + replace_by_verse(part_xml) + add_to_manifest(manifest, stupid_j) + add_to_spine(spine, stupid_j) + + zip.writestr('OPS/part%d.html' % stupid_j, + etree.tostring(xslt(part_xml, res('xsltScheme.xsl')), pretty_print=True)) + + # Last modifications in container files and EPUB creation + if len(annotations) > 0: + nav_map.append(etree.fromstring( + 'Przypisy'\ + '' % {'i':stupid_i+2})) + manifest.append(etree.fromstring( + '')) + spine.append(etree.fromstring( + '')) + replace_by_verse(annotations) + zip.writestr('OPS/annotations.html', etree.tostring( + xslt(annotations, res("xsltAnnotations.xsl")), pretty_print=True)) + + zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) + contents = [] + title = node_name(etree.ETXPath('.//'+DCNS('title'))(input_xml)[0]) + attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" + for st in attributes: + meta = toc_file.makeelement(NCXNS('meta')) + meta.set('name', st) + meta.set('content', '0') + toc_file[0].append(meta) + toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl'))) + toc_file[0][1].set('content', str(depth)) + set_inner_xml(toc_file[1], ''.join(('', title, ''))) + zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) + zip.close() + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print >> sys.stderr, 'Usage: wl2epub [output file]' + sys.exit(1) + + input = sys.argv[1] + if len(sys.argv) > 2: + output = sys.argv[2] + else: + basename, ext = os.path.splitext(input) + output = basename + '.epub' + + wl2epub(open(input, 'r'), open(output, 'w')) + + + diff --git a/librarian/epub/logo_wolnelektury.png b/librarian/epub/logo_wolnelektury.png new file mode 100644 index 0000000..104d56a Binary files /dev/null and b/librarian/epub/logo_wolnelektury.png differ diff --git a/librarian/epub/style.css b/librarian/epub/style.css new file mode 100644 index 0000000..652d903 --- /dev/null +++ b/librarian/epub/style.css @@ -0,0 +1,317 @@ +body +{ + font-size: 12pt; + font: Georgia, "Times New Roman" , serif; + line-height: 1.5em; + margin: 0; +} + +a +{ + color: black; + text-decoration: none; +} + +#book-text +{ + margin: 2em; + /*margin-right: 9em;*/ +} + +/* =================================================== */ +/* = Common elements: headings, paragraphs and lines = */ +/* =================================================== */ + + +.h2 +{ + size: big; + font-size: 2em; + margin: 0; + margin-top: 1.5em; + font-weight: bold; + line-height: 1.5em; +} + +.h3 +{ + text-align:left; + font-size: 1.5em; + margin-top: 1.5em; + font-weight: normal; + line-height: 1.5em; +} + +.h4 +{ + font-size: 1em; + margin: 0; + margin-top: 1.5em; + line-height: 1.5em; +} + +p +{ + margin: 0; +} + +/* ======================== */ +/* = Footnotes and themes = */ +/* ======================== */ + +.theme-begin +{ + border-left: 0.1em solid #DDDDDD; + color: #777; + padding: 0 0.5em; + width: 7.5em; + font-style: normal; + font-weight: normal; + font-size: 0.875em; + float: right; + margin-right: -9.5em; + clear: both; + left: 40em; + line-height: 1.5em; + text-align: left; +} + +.annotation +{ + font-style: normal; + font-weight: normal; + font-size: 0.875em; +} + +#footnotes .annotation +{ + display: block; + float: left; + width: 2.5em; + clear: both; +} + +#footnotes div +{ + margin: 0; + margin-top: 1.5em; +} + +#footnotes p +{ + margin-left: 2.5em; + font-size: 0.875em; +} + +.block +{ + font-size: 0.875em; + padding: 1em; +} + +/* ============= */ +/* = Numbering = */ +/* ============= */ + +.anchor +{ + margin: -0.25em -0.5em; + color: #777; + font-size: 0.875em; + width: 2em; + text-align: center; + padding: 0.25em 0.5em; + line-height: 1.5em; +} + +/* =================== */ +/* = Custom elements = */ +/* =================== */ + +.title-page +{ + margin-top: 1.5em; +} + +.title +{ + font-size: 3em; + margin-bottom: 1.5em; + text-align: center; + line-height: 1.5em; + font-weight: bold; +} + +.author +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.5em; + line-height: 1.5em; + margin-bottom: 0.25em; +} + +.collection +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.125em; + line-height: 1.5em; + margin-bottom: -0.25em; +} + +.subtitle +{ + margin: 0; + text-align: center; + font-weight: bold; + + font-size: 1.5em; + line-height: 1.5em; + margin-top: -0.25em; +} + +div.didaskalia +{ + font-style: italic; + margin-top: 0.5em; + margin-left: 1.5em; +} + +div.kwestia +{ + margin-top: 0.5em; +} + +div.stanza +{ + margin-top: 1.5em; +} + +.paragraph +{ + text-align: justify; + margin-top: 1.5em; +} + +.motto +{ + text-align: justify; + font-style: italic; + margin-top: 1.5em; +} + +.motto_podpis +{ + font-size: 0.875em; + text-align: right; +} + +div.fragment +{ + border-bottom: 0.1em solid #999; + padding-bottom: 1.5em; +} + +div.note +{ + text-align: right; + font-style: italic; +} +div.note div.paragraph +{ + text-align: right; + font-style: italic; +} +div.dedication +{ + text-align: right; + font-style: italic; +} +div.dedication div.paragaph +{ + text-align: right; + font-style: italic; +} + + +hr.spacer +{ + height: 3em; + visibility: hidden; +} + +hr.spacer-line +{ + margin: 0; + margin-top: 1.5em; + margin-bottom: 1.5em; + border: none; + border-bottom: 0.1em solid #000; +} + +.spacer-asterisk +{ + padding: 0; + margin: 0; + margin-top: 1.5em; + margin-bottom: 1.5em; + text-align: center; +} + +div.person-list ol +{ + list-style: none; + padding: 0; + padding-left: 1.5em; +} + +.place-and-time +{ + font-style: italic; +} + +em.math +{ + font-style: italic; +} +em.foreign-word +{ + font-style: italic; +} +em.book-title +{ + font-style: italic; +} +em.didaskalia +{ + font-style: italic; +} + +em.author-emphasis +{ + letter-spacing: 0.1em; +} + +em.person +{ + font-style: normal; + font-variant: small-caps; +} + +p.info +{ + text-align: center; + margin-bottom: 1em; +} + +p.info img +{ + margin: 0; + margin-left: 2em; + margin-right: 2em; +} diff --git a/librarian/epub/xsltAnnotations.xsl b/librarian/epub/xsltAnnotations.xsl new file mode 100644 index 0000000..d0af8ab --- /dev/null +++ b/librarian/epub/xsltAnnotations.xsl @@ -0,0 +1,77 @@ + + + + + + + + + + + + + <xsl:text>Przypisy</xsl:text> + + + +

+ Przypisy: +

+ +

+ + + + + + + + + +

+ + [] + +

+ +

+ + + + + + + + + +

+ +

+ + + +

+ +

+ + + + + â + + â + + + + +

+ +

+ + + \ No newline at end of file diff --git a/librarian/epub/xsltContent.xsl b/librarian/epub/xsltContent.xsl new file mode 100644 index 0000000..548bcb7 --- /dev/null +++ b/librarian/epub/xsltContent.xsl @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/librarian/epub/xsltScheme.xsl b/librarian/epub/xsltScheme.xsl new file mode 100644 index 0000000..109b1cc --- /dev/null +++ b/librarian/epub/xsltScheme.xsl @@ -0,0 +1,342 @@ + + + + + + + + + + + + + WolneLektury.pl + + + + + book-text + + + + + + + + + + + + + + + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + + + + + +

+ +

+ + + + + +

+ +

+ + + + + +

+ +

+ + + + + +

+ +

+ + + + + +

+ +

+ + + +

+ +

+ + + + +

+ +

+ + + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + â + + â + + + + + + + + + + + + + + + + + + + + +

+ +

+ + + + + + + +

+ + + +

+ + + + + + + + + + + + + + + + + + + [] + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/librarian/epub/xsltTitle.xsl b/librarian/epub/xsltTitle.xsl new file mode 100644 index 0000000..c69db26 --- /dev/null +++ b/librarian/epub/xsltTitle.xsl @@ -0,0 +1,91 @@ + + + + + + + + + + + + + <xsl:text>Strona tytuÅowa</xsl:text> + + + +

+ + + + + + + + +

Publikacja zrealizowana w ramach projektu WolneLektury.pl

+ WolneLektury.pl +

+ + + + + + + + + + + + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + +

+ +

+ + + + + + + + + + + + + \ No newline at end of file