X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/13480b3da2d3da87f1d99c6d340c1553ca9d89c1..0560b4a83f947a4d08f087d85759f05516f6e580:/librarian/formats/epub/__init__.py diff --git a/librarian/formats/epub/__init__.py b/librarian/formats/epub/__init__.py index f9f7565..36891be 100644 --- a/librarian/formats/epub/__init__.py +++ b/librarian/formats/epub/__init__.py @@ -4,14 +4,19 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import os +import re +import urllib from copy import deepcopy +from mimetypes import guess_type from tempfile import NamedTemporaryFile import zipfile +from urllib2 import urlopen + from lxml import etree -from librarian import OPFNS, NCXNS, XHTMLNS +from librarian import OPFNS, NCXNS, XHTMLNS, DCNS, BuildError from librarian import core from librarian.formats import Format -from librarian.formats.cover.wolnelektury import WLCover +from librarian.formats.cover.evens import EvensCover from librarian.output import OutputFile from librarian.renderers import Register, TreeRenderer, UnknownElement from librarian.utils import Context, get_resource, extend_element @@ -21,7 +26,7 @@ class EpubFormat(Format): format_name = 'EPUB' format_ext = 'epub' - cover = WLCover + cover = EvensCover renderers = Register() def __init__(self, doc, cover=None, with_fonts=True): @@ -30,12 +35,35 @@ class EpubFormat(Format): if cover is not None: self.cover = cover - def build(self): + def dc(self, tag, multiple=False): + if multiple: + return ', '.join(self.doc.meta.get(DCNS(tag))) + else: + return self.doc.meta.get_one(DCNS(tag)) + + def build(self, ctx=None): + + def add_file(url, file_id): + filename = url.rsplit('/', 1)[1] + if url.startswith('file://'): + url = ctx.files_path + urllib.quote(url[7:]) + if url.startswith('/'): + url = 'http://milpeer.eu' + url + file_content = urlopen(url).read() + zip.writestr(os.path.join('OPS', filename), file_content) + manifest.append(etree.fromstring( + '' % (file_id, filename, guess_type(url)[0]))) + opf = etree.parse(get_resource('formats/epub/res/content.opf')) manifest = opf.find(OPFNS('manifest')) guide = opf.find(OPFNS('guide')) spine = opf.find(OPFNS('spine')) + author = ", ". join(self.doc.meta.get(DCNS('creator')) or []) + title = self.doc.meta.title() + opf.find('.//' + DCNS('creator')).text = author + opf.find('.//' + DCNS('title')).text = title + output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) @@ -44,21 +72,23 @@ class EpubFormat(Format): mime.compress_type = zipfile.ZIP_STORED mime.extra = '' zip.writestr(mime, 'application/epub+zip') - zip.writestr('META-INF/container.xml', '' \ - '' \ - '') - - toc_file = etree.fromstring('' \ - '' \ - '') - nav_map = toc_file[-1] + zip.writestr('META-INF/container.xml', '' + '' + '') + + toc_file = etree.fromstring('' + '' + '') + # nav_map = toc_file[-1] if self.cover is not None: + # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0] cover = self.cover(self.doc) + cover.set_images(ctx) cover_output = cover.build() cover_name = 'cover.%s' % cover.format_ext zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string()) @@ -71,9 +101,9 @@ class EpubFormat(Format): if cover.uses_dc_cover: if self.doc.meta.get_one('cover_by'): - document.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by')) + self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by')) if self.doc.meta.get_one('cover_source'): - document.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source')) + self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source')) manifest.append(etree.fromstring( '')) @@ -83,16 +113,19 @@ class EpubFormat(Format): opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring('')) - - ctx = Context(format=self) + if not ctx: + ctx = Context(format=self) + else: + ctx.format = self ctx.toc = TOC() ctx.toc_level = 0 ctx.footnotes = Footnotes() + ctx.images = [] ctx.part_no = 0 wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html')) for e in self.render(self.doc.edoc.getroot(), ctx): - if not len(e) and not e.text.strip(): + if not len(e) and not (e.text and e.text.strip()): continue wrap = deepcopy(wrap_tmpl) extend_element(wrap.find('//*[@id="book-text"]'), e) @@ -108,19 +141,46 @@ class EpubFormat(Format): })) zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html')) + for i, url in enumerate(ctx.images): + add_file(url, 'image%s' % i) + if len(ctx.footnotes.output): ctx.toc.add("Przypisy", "footnotes.html") - manifest.append(etree.Element(OPFNS('item'), - id='footnotes', href='footnotes.html', - **{'media-type': "application/xhtml+xml"})) + manifest.append(etree.Element( + OPFNS('item'), id='footnotes', href='footnotes.html', + **{'media-type': "application/xhtml+xml"})) spine.append(etree.Element('itemref', idref='footnotes')) wrap = etree.parse(get_resource('formats/epub/res/footnotes.html')) extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output) - #chars = chars.union(used_chars(html_tree.getroot())) + # chars = chars.union(used_chars(html_tree.getroot())) zip.writestr('OPS/footnotes.html', etree.tostring( wrap, method="html", pretty_print=True)) + footer_text = [ + 'Information about the resource', + 'Publisher: %s' % self.dc('publisher'), + 'Rights: %s' % self.dc('rights'), + 'Intended audience: %s' % self.dc('audience', multiple=True), + self.dc('description'), + 'Resource prepared using MIL/PEER editing platform.', + 'Source available at %s' % ctx.source_url, + ] + footer_wrap = deepcopy(wrap_tmpl) + footer_body = footer_wrap.find('//*[@id="book-text"]') + for line in footer_text: + footer_line = etree.Element('p') + footer_line.text = line + footer_body.append(footer_line) + manifest.append(manifest.makeelement(OPFNS('item'), attrib={ + 'id': 'footer', + 'href': "footer.html", + 'media-type': 'application/xhtml+xml', + })) + spine.append(spine.makeelement(OPFNS('itemref'), attrib={ + 'idref': 'footer', + })) + zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html')) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) ctx.toc.render(toc_file[-1]) @@ -173,6 +233,24 @@ class EpubRenderer(TreeRenderer): yield wrapper +class NaturalText(EpubRenderer): + def render_text(self, text, ctx): + root, inner = self.text_container() + chunks = re.split('(?<=\s\w) ', text) + inner.text = chunks[0] + for chunk in chunks[1:]: + x = etree.Entity("nbsp") + x.tail = chunk + inner.append(x) + return root + + +class Silent(EpubRenderer): + def render_text(self, text, ctx): + root, inner = self.text_container() + return root + + class Footnotes(object): def __init__(self): self.counter = 0 @@ -180,8 +258,8 @@ class Footnotes(object): def append(self, items): self.counter += 1 - e = etree.Element("a", - href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter), + e = etree.Element( + "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter), id="footnote-%d" % self.counter, style="float:left;margin-right:1em") e.text = "[%d]" % self.counter @@ -189,9 +267,8 @@ class Footnotes(object): self.output.append(e) for item in items: extend_element(self.output, item) - anchor = etree.Element("a", - id="footnote-anchor-%d" % self.counter, - href="footnotes.html#footnote-%d" % self.counter) + anchor = etree.Element( + "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter) anchor.text = "[%d]" % self.counter return anchor @@ -230,22 +307,24 @@ class TOC(object): content.set('src', child.href) nav_point.append(content) nav_map.append(nav_point) - child.render(nav_map) + child.render(nav_point) # Renderers -class AsideR(EpubRenderer): +class AsideR(NaturalText): def render(self, element, ctx): outputs = list(super(AsideR, self).render(element, ctx)) anchor = ctx.footnotes.append(outputs) - wrapper, inside = self.text_container() #etree.Element('_', part_no=str(ctx.part_no)) + wrapper, inside = self.text_container() # etree.Element('_', part_no=str(ctx.part_no)) inside.append(anchor) yield wrapper EpubFormat.renderers.register(core.Aside, None, AsideR('div')) +EpubFormat.renderers.register(core.Aside, 'comment', Silent()) + -class DivR(EpubRenderer): +class DivR(NaturalText): def container(self, ctx): root, inner = super(DivR, self).container(ctx) if getattr(ctx, 'inline', False): @@ -253,15 +332,54 @@ class DivR(EpubRenderer): inner.set('style', 'display: block;') return root, inner EpubFormat.renderers.register(core.Div, None, DivR('div')) +EpubFormat.renderers.register(core.Div, 'p', NaturalText('p')) + +EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul')) +EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol')) +EpubFormat.renderers.register(core.Div, 'item', NaturalText('li')) +EpubFormat.renderers.register(core.Span, 'item', NaturalText('li')) -class HeaderR(EpubRenderer): +class DivImageR(EpubRenderer): + def render(self, element, ctx): + src = element.attrib.get('src', '') + ctx.images.append(src) + if '/' not in src: + raise BuildError('Bad image URL') + src = src.rsplit('/', 1)[1] + return super(DivImageR, self).render(element, Context(ctx, src=src)) + + def container(self, ctx): + root, inner = super(DivImageR, self).container(ctx) + src = getattr(ctx, 'src', '') + inner.set('src', src) + # inner.set('style', 'display: block; width: 60%; margin: 3em auto') + return root, inner +EpubFormat.renderers.register(core.Div, 'img', DivImageR('img')) + + +class DivVideoR(Silent): + def render(self, element, ctx): + src = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid', '') + return super(DivVideoR, self).render(element, Context(ctx, src=src)) + + def container(self, ctx): + root, inner = super(DivVideoR, self).container(ctx) + src = getattr(ctx, 'src', '') + link = etree.Element('a', {'href': src}) + link.text = src + inner.append(link) + return root, inner +EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p')) + + +class HeaderR(NaturalText): def subcontext(self, element, ctx): return Context(ctx, inline=True) EpubFormat.renderers.register(core.Header, None, HeaderR('h1')) -class SectionR(EpubRenderer): +class SectionR(NaturalText): epub_separate = True def render(self, element, ctx): @@ -273,7 +391,21 @@ class SectionR(EpubRenderer): EpubFormat.renderers.register(core.Section, None, SectionR()) -class SpanR(EpubRenderer): +class SpanR(NaturalText): pass EpubFormat.renderers.register(core.Span, None, SpanR('span')) +EpubFormat.renderers.register(core.Span, 'cite', SpanR('i')) +EpubFormat.renderers.register(core.Span, 'emp', SpanR('b')) +EpubFormat.renderers.register(core.Span, 'emph', SpanR('i')) + +class SpanLink(EpubRenderer): + def render(self, element, ctx): + parts = super(SpanLink, self).render(element, ctx) + for part in parts: + src = element.attrib.get('href', '') + if src.startswith('file://'): + src = ctx.files_path + src[7:] + part[0].attrib['href'] = src + yield part +EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))