1 # -*- coding: utf-8 -*-
 
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   9 from copy import deepcopy
 
  10 from mimetypes import guess_type
 
  11 from tempfile import NamedTemporaryFile
 
  13 from urllib2 import urlopen
 
  15 from lxml import etree
 
  16 from librarian import OPFNS, NCXNS, XHTMLNS, DCNS
 
  17 from librarian import core
 
  18 from librarian.formats import Format
 
  19 from librarian.formats.cover.evens import EvensCover
 
  20 from librarian.output import OutputFile
 
  21 from librarian.renderers import Register, TreeRenderer, UnknownElement
 
  22 from librarian.utils import Context, get_resource, extend_element
 
  25 class EpubFormat(Format):
 
  30     renderers = Register()
 
  32     def __init__(self, doc, cover=None, with_fonts=True):
 
  33         super(EpubFormat, self).__init__(doc)
 
  34         self.with_fonts = with_fonts
 
  39         return self.doc.meta.get_one(DCNS(tag))
 
  41     def build(self, ctx=None):
 
  43         def add_file(url, file_id):
 
  44             filename = url.rsplit('/', 1)[1]
 
  45             if url.startswith('file://'):
 
  46                 url = ctx.files_path + urllib.quote(url[7:])
 
  47             if url.startswith('/'):
 
  48                 url = 'http://milpeer.eu' + url
 
  49             file_content = urlopen(url).read()
 
  50             zip.writestr(os.path.join('OPS', filename), file_content)
 
  51             manifest.append(etree.fromstring(
 
  52                 '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
 
  54         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
 
  55         manifest = opf.find(OPFNS('manifest'))
 
  56         guide = opf.find(OPFNS('guide'))
 
  57         spine = opf.find(OPFNS('spine'))
 
  59         author = ", ". join(self.doc.meta.get(DCNS('creator')) or [])
 
  60         title = self.doc.meta.title()
 
  61         opf.find('.//' + DCNS('creator')).text = author
 
  62         opf.find('.//' + DCNS('title')).text = title
 
  64         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 
  65         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
  67         mime = zipfile.ZipInfo()
 
  68         mime.filename = 'mimetype'
 
  69         mime.compress_type = zipfile.ZIP_STORED
 
  71         zip.writestr(mime, 'application/epub+zip')
 
  72         zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
 
  73                      'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 
  74                      '<rootfiles><rootfile full-path="OPS/content.opf" '
 
  75                      'media-type="application/oebps-package+xml" />'
 
  76                      '</rootfiles></container>')
 
  78         toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 
  79                                     '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 
  80                                     '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 
  81                                     'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 
  83         # nav_map = toc_file[-1]
 
  85         if self.cover is not None:
 
  86             # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
 
  87             cover = self.cover(self.doc)
 
  89             cover_output = cover.build()
 
  90             cover_name = 'cover.%s' % cover.format_ext
 
  91             zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
 
  94             cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
 
  95             cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 
  96             zip.writestr('OPS/cover.html', etree.tostring(
 
  97                             cover_tree, method="html", pretty_print=True))
 
  99             if cover.uses_dc_cover:
 
 100                 if self.doc.meta.get_one('cover_by'):
 
 101                     self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
 
 102                 if self.doc.meta.get_one('cover_source'):
 
 103                     self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
 
 105             manifest.append(etree.fromstring(
 
 106                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 
 107             manifest.append(etree.fromstring(
 
 108                 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
 
 109             spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
 
 110             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 
 111             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
 114             ctx = Context(format=self)
 
 119         ctx.footnotes = Footnotes()
 
 123         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
 
 124         for e in self.render(self.doc.edoc.getroot(), ctx):
 
 125             if not len(e) and not (e.text and e.text.strip()):
 
 127             wrap = deepcopy(wrap_tmpl)
 
 128             extend_element(wrap.find('//*[@id="book-text"]'), e)
 
 130             partstr = 'part%d' % int(e.get('part_no'))
 
 131             manifest.append(manifest.makeelement(OPFNS('item'), attrib={
 
 133                                  'href': partstr + ".html",
 
 134                                  'media-type': 'application/xhtml+xml',
 
 136             spine.append(spine.makeelement(OPFNS('itemref'), attrib={
 
 139             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
 
 141         for i, url in enumerate(ctx.images):
 
 142             add_file(url, 'image%s' % i)
 
 144         if len(ctx.footnotes.output):
 
 145             ctx.toc.add("Przypisy", "footnotes.html")
 
 146             manifest.append(etree.Element(
 
 147                 OPFNS('item'), id='footnotes', href='footnotes.html',
 
 148                 **{'media-type': "application/xhtml+xml"}))
 
 149             spine.append(etree.Element('itemref', idref='footnotes'))
 
 150             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
 
 151             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
 
 153             # chars = chars.union(used_chars(html_tree.getroot()))
 
 154             zip.writestr('OPS/footnotes.html', etree.tostring(
 
 155                                 wrap, method="html", pretty_print=True))
 
 158             'Information about the resource',
 
 159             'Publisher: %s' % self.dc('publisher'),
 
 160             'Rights: %s' % self.dc('rights'),
 
 161             'Intended audience: %s' % self.dc('audience'),
 
 162             self.dc('description'),
 
 163             'Resource prepared using MIL/PEER editing platform.',
 
 164             'Source available at %s' % ctx.source_url,
 
 166         footer_wrap = deepcopy(wrap_tmpl)
 
 167         footer_body = footer_wrap.find('//*[@id="book-text"]')
 
 168         for line in footer_text:
 
 169             footer_line = etree.Element('p')
 
 170             footer_line.text = line
 
 171             footer_body.append(footer_line)
 
 172         manifest.append(manifest.makeelement(OPFNS('item'), attrib={
 
 174             'href': "footer.html",
 
 175             'media-type': 'application/xhtml+xml',
 
 177         spine.append(spine.makeelement(OPFNS('itemref'), attrib={
 
 180         zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html'))
 
 182         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
 
 183         ctx.toc.render(toc_file[-1])
 
 184         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
 
 186         return OutputFile.from_filename(output_file.name)
 
 188     def render(self, element, ctx):
 
 189         return self.renderers.get_for(element).render(element, ctx)
 
 194 class EpubRenderer(TreeRenderer):
 
 195     """ Renders insides as XML in a <_/> container. """
 
 196     def container(self, ctx):
 
 197         root, inner = super(EpubRenderer, self).container()
 
 198         root.set("part_no", str(ctx.part_no))
 
 201     def render(self, element, ctx):
 
 202         subctx = self.subcontext(element, ctx)
 
 203         wrapper, inside = self.container(ctx)
 
 205             extend_element(inside, self.render_text(element.text, ctx))
 
 206         for child in element:
 
 208                 child_renderer = ctx.format.renderers.get_for(child)
 
 209             except UnknownElement:
 
 212                 if getattr(child_renderer, 'epub_separate', False):
 
 215                     for child_part in child_renderer.render(child, subctx):
 
 217                     wrapper, inside = self.container(ctx)
 
 219                     child_parts = list(child_renderer.render(child, subctx))
 
 220                     extend_element(inside, child_parts[0])
 
 221                     if len(child_parts) > 1:
 
 223                         for child_part in child_parts[1:-1]:
 
 225                         wrapper, inside = self.container(ctx)
 
 226                         extend_element(inside, child_parts[-1])
 
 229                     extend_element(inside, self.render_text(child.tail, ctx))
 
 233 class NaturalText(EpubRenderer):
 
 234     def render_text(self, text, ctx):
 
 235         root, inner = self.text_container()
 
 236         chunks = re.split('(?<=\s\w) ', text)
 
 237         inner.text = chunks[0]
 
 238         for chunk in chunks[1:]:
 
 239             x = etree.Entity("nbsp")
 
 245 class Silent(EpubRenderer):
 
 246     def render_text(self, text, ctx):
 
 247         root, inner = self.text_container()
 
 251 class Footnotes(object):
 
 254         self.output = etree.Element("_")
 
 256     def append(self, items):
 
 259             "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
 
 260             id="footnote-%d" % self.counter,
 
 261             style="float:left;margin-right:1em")
 
 262         e.text = "[%d]" % self.counter
 
 264         self.output.append(e)
 
 266             extend_element(self.output, item)
 
 267         anchor = etree.Element(
 
 268             "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
 
 269         anchor.text = "[%d]" % self.counter
 
 274     def __init__(self, title=None, href="", root=None):
 
 282         self.href = href.format(counter=self.root.counter)
 
 283         self.number = self.root.counter
 
 284         self.root.counter += 1
 
 286     def add(self, title, href):
 
 287         subtoc = type(self)(title, href, root=self.root)
 
 288         self.children.append(subtoc)
 
 291     def render(self, nav_map):
 
 292         for child in self.children:
 
 293             nav_point = etree.Element(NCXNS('navPoint'))
 
 294             nav_point.set('id', 'NavPoint-%d' % child.number)
 
 295             nav_point.set('playOrder', str(child.number))
 
 297             nav_label = etree.Element(NCXNS('navLabel'))
 
 298             text = etree.Element(NCXNS('text'))
 
 299             text.text = child.title
 
 300             nav_label.append(text)
 
 301             nav_point.append(nav_label)
 
 303             content = etree.Element(NCXNS('content'))
 
 304             content.set('src', child.href)
 
 305             nav_point.append(content)
 
 306             nav_map.append(nav_point)
 
 307             child.render(nav_point)
 
 312 class AsideR(NaturalText):
 
 313     def render(self, element, ctx):
 
 314         outputs = list(super(AsideR, self).render(element, ctx))
 
 315         anchor = ctx.footnotes.append(outputs)
 
 316         wrapper, inside = self.text_container()  # etree.Element('_', part_no=str(ctx.part_no))
 
 317         inside.append(anchor)
 
 319 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
 
 321 EpubFormat.renderers.register(core.Aside, 'comment', Silent())
 
 324 class DivR(NaturalText):
 
 325     def container(self, ctx):
 
 326         root, inner = super(DivR, self).container(ctx)
 
 327         if getattr(ctx, 'inline', False):
 
 329             inner.set('style', 'display: block;')
 
 331 EpubFormat.renderers.register(core.Div, None, DivR('div'))
 
 332 EpubFormat.renderers.register(core.Div, 'p', NaturalText('p'))
 
 334 EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
 
 335 EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
 
 336 EpubFormat.renderers.register(core.Div, 'item', NaturalText('li'))
 
 339 class DivImageR(EpubRenderer):
 
 340     def render(self, element, ctx):
 
 341         src = element.attrib.get('src', '')
 
 342         ctx.images.append(src)
 
 343         src = src.rsplit('/', 1)[1]
 
 344         return super(DivImageR, self).render(element, Context(ctx, src=src))
 
 346     def container(self, ctx):
 
 347         root, inner = super(DivImageR, self).container(ctx)
 
 348         src = getattr(ctx, 'src', '')
 
 349         inner.set('src', src)
 
 350         # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
 
 352 EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
 
 355 class DivVideoR(Silent):
 
 356     def render(self, element, ctx):
 
 357         src = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid', '')
 
 358         return super(DivVideoR, self).render(element, Context(ctx, src=src))
 
 360     def container(self, ctx):
 
 361         root, inner = super(DivVideoR, self).container(ctx)
 
 362         src = getattr(ctx, 'src', '')
 
 363         link = etree.Element('a', {'href': src})
 
 367 EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p'))
 
 370 class HeaderR(NaturalText):
 
 371     def subcontext(self, element, ctx):
 
 372         return Context(ctx, inline=True)
 
 373 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
 
 376 class SectionR(NaturalText):
 
 379     def render(self, element, ctx):
 
 381         if element.getparent() is not None:
 
 382             tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
 
 383             ctx = Context(ctx, toc=tocitem)
 
 384         return super(SectionR, self).render(element, ctx)
 
 385 EpubFormat.renderers.register(core.Section, None, SectionR())
 
 388 class SpanR(NaturalText):
 
 390 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
 
 391 EpubFormat.renderers.register(core.Span, 'cite', SpanR('i'))
 
 392 EpubFormat.renderers.register(core.Span, 'emp', SpanR('b'))
 
 393 EpubFormat.renderers.register(core.Span, 'emph', SpanR('i'))
 
 396 class SpanLink(EpubRenderer):
 
 397     def render(self, element, ctx):
 
 398         parts = super(SpanLink, self).render(element, ctx)
 
 400             src = element.attrib.get('href', '')
 
 401             if src.startswith('file://'):
 
 402                 src = ctx.files_path + src[7:]
 
 403             part[0].attrib['href'] = src
 
 405 EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))