1 # -*- coding: utf-8 -*-
 
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 
   9 from copy import deepcopy
 
  10 from mimetypes import guess_type
 
  11 from tempfile import NamedTemporaryFile
 
  13 from urllib2 import urlopen
 
  15 from lxml import etree
 
  16 from librarian import OPFNS, NCXNS, XHTMLNS, DCNS, BuildError, VIDEO_PROVIDERS
 
  17 from librarian import core
 
  18 from librarian.formats import Format
 
  19 from librarian.formats.cover.evens import EvensCover
 
  20 from librarian.output import OutputFile
 
  21 from librarian.renderers import Register, TreeRenderer, UnknownElement
 
  22 from librarian.utils import Context, get_resource, extend_element
 
  25 class EpubFormat(Format):
 
  30     renderers = Register()
 
  32     def __init__(self, doc, cover=None, with_fonts=True):
 
  33         super(EpubFormat, self).__init__(doc)
 
  34         self.with_fonts = with_fonts
 
  38     def dc(self, tag, multiple=False):
 
  40             return ', '.join(self.doc.meta.get(DCNS(tag)))
 
  42             return self.doc.meta.get_one(DCNS(tag))
 
  44     def build(self, ctx=None):
 
  46         def add_file(url, file_id):
 
  47             filename = url.rsplit('/', 1)[1]
 
  48             if url.startswith('file://'):
 
  49                 url = ctx.files_path + urllib.quote(url[7:])
 
  50             if url.startswith('/'):
 
  51                 url = 'http://milpeer.eu' + url
 
  52             file_content = urlopen(url).read()
 
  53             zip.writestr(os.path.join('OPS', filename), file_content)
 
  54             manifest.append(etree.fromstring(
 
  55                 '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
 
  57         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
 
  58         manifest = opf.find(OPFNS('manifest'))
 
  59         guide = opf.find(OPFNS('guide'))
 
  60         spine = opf.find(OPFNS('spine'))
 
  62         author = ", ". join(self.doc.meta.get(DCNS('creator')) or [])
 
  63         title = self.doc.meta.title()
 
  64         opf.find('.//' + DCNS('creator')).text = author
 
  65         opf.find('.//' + DCNS('title')).text = title
 
  67         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 
  68         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 
  70         mime = zipfile.ZipInfo()
 
  71         mime.filename = 'mimetype'
 
  72         mime.compress_type = zipfile.ZIP_STORED
 
  74         zip.writestr(mime, 'application/epub+zip')
 
  75         zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
 
  76                      'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 
  77                      '<rootfiles><rootfile full-path="OPS/content.opf" '
 
  78                      'media-type="application/oebps-package+xml" />'
 
  79                      '</rootfiles></container>')
 
  81         toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 
  82                                     '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 
  83                                     '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 
  84                                     'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 
  86         # nav_map = toc_file[-1]
 
  88         if self.cover is not None:
 
  89             # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
 
  90             cover = self.cover(self.doc)
 
  92             cover_output = cover.build()
 
  93             cover_name = 'cover.%s' % cover.format_ext
 
  94             zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
 
  97             cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
 
  98             cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 
  99             zip.writestr('OPS/cover.html', etree.tostring(
 
 100                             cover_tree, method="html", pretty_print=True))
 
 102             if cover.uses_dc_cover:
 
 103                 if self.doc.meta.get_one('cover_by'):
 
 104                     self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
 
 105                 if self.doc.meta.get_one('cover_source'):
 
 106                     self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
 
 108             manifest.append(etree.fromstring(
 
 109                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 
 110             manifest.append(etree.fromstring(
 
 111                 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
 
 112             spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
 
 113             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 
 114             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 
 117             ctx = Context(format=self)
 
 122         ctx.footnotes = Footnotes()
 
 126         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
 
 127         for e in self.render(self.doc.edoc.getroot(), ctx):
 
 128             if not len(e) and not (e.text and e.text.strip()):
 
 130             wrap = deepcopy(wrap_tmpl)
 
 131             extend_element(wrap.find('//*[@id="book-text"]'), e)
 
 133             partstr = 'part%d' % int(e.get('part_no'))
 
 134             manifest.append(manifest.makeelement(OPFNS('item'), attrib={
 
 136                                  'href': partstr + ".html",
 
 137                                  'media-type': 'application/xhtml+xml',
 
 139             spine.append(spine.makeelement(OPFNS('itemref'), attrib={
 
 142             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
 
 144         for i, url in enumerate(ctx.images):
 
 145             add_file(url, 'image%s' % i)
 
 147         if len(ctx.footnotes.output):
 
 148             ctx.toc.add("Przypisy", "footnotes.html")
 
 149             manifest.append(etree.Element(
 
 150                 OPFNS('item'), id='footnotes', href='footnotes.html',
 
 151                 **{'media-type': "application/xhtml+xml"}))
 
 152             spine.append(etree.Element('itemref', idref='footnotes'))
 
 153             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
 
 154             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
 
 156             # chars = chars.union(used_chars(html_tree.getroot()))
 
 157             zip.writestr('OPS/footnotes.html', etree.tostring(
 
 158                                 wrap, method="html", pretty_print=True))
 
 161             'Information about the resource',
 
 162             'Publisher: %s' % self.dc('publisher'),
 
 163             'Rights: %s' % self.dc('rights'),
 
 164             'Intended audience: %s' % self.dc('audience', multiple=True),
 
 165             self.dc('description'),
 
 166             'Resource prepared using MIL/PEER editing platform.',
 
 167             'Source available at %s' % ctx.source_url,
 
 169         footer_wrap = deepcopy(wrap_tmpl)
 
 170         footer_body = footer_wrap.find('//*[@id="book-text"]')
 
 171         for line in footer_text:
 
 172             footer_line = etree.Element('p')
 
 173             footer_line.text = line
 
 174             footer_body.append(footer_line)
 
 175         manifest.append(manifest.makeelement(OPFNS('item'), attrib={
 
 177             'href': "footer.html",
 
 178             'media-type': 'application/xhtml+xml',
 
 180         spine.append(spine.makeelement(OPFNS('itemref'), attrib={
 
 183         zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html'))
 
 185         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
 
 186         ctx.toc.render(toc_file[-1])
 
 187         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
 
 189         return OutputFile.from_filename(output_file.name)
 
 191     def render(self, element, ctx):
 
 192         return self.renderers.get_for(element).render(element, ctx)
 
 197 class EpubRenderer(TreeRenderer):
 
 198     """ Renders insides as XML in a <_/> container. """
 
 199     def container(self, ctx):
 
 200         root, inner = super(EpubRenderer, self).container()
 
 201         root.set("part_no", str(ctx.part_no))
 
 204     def render(self, element, ctx):
 
 205         subctx = self.subcontext(element, ctx)
 
 206         wrapper, inside = self.container(ctx)
 
 208             extend_element(inside, self.render_text(element.text, ctx))
 
 209         for child in element:
 
 211                 child_renderer = ctx.format.renderers.get_for(child)
 
 212             except UnknownElement:
 
 215                 if getattr(child_renderer, 'epub_separate', False):
 
 218                     for child_part in child_renderer.render(child, subctx):
 
 220                     wrapper, inside = self.container(ctx)
 
 222                     child_parts = list(child_renderer.render(child, subctx))
 
 223                     extend_element(inside, child_parts[0])
 
 224                     if len(child_parts) > 1:
 
 226                         for child_part in child_parts[1:-1]:
 
 228                         wrapper, inside = self.container(ctx)
 
 229                         extend_element(inside, child_parts[-1])
 
 232                     extend_element(inside, self.render_text(child.tail, ctx))
 
 236 class NaturalText(EpubRenderer):
 
 237     def render_text(self, text, ctx):
 
 238         root, inner = self.text_container()
 
 239         chunks = re.split('(?<=\s\w) ', text)
 
 240         inner.text = chunks[0]
 
 241         for chunk in chunks[1:]:
 
 242             x = etree.Entity("nbsp")
 
 248 class Silent(EpubRenderer):
 
 249     def render_text(self, text, ctx):
 
 250         root, inner = self.text_container()
 
 254 class Footnotes(object):
 
 257         self.output = etree.Element("_")
 
 259     def append(self, items):
 
 262             "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
 
 263             id="footnote-%d" % self.counter,
 
 264             style="float:left;margin-right:1em")
 
 265         e.text = "[%d]" % self.counter
 
 267         self.output.append(e)
 
 269             extend_element(self.output, item)
 
 270         anchor = etree.Element(
 
 271             "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
 
 272         anchor.text = "[%d]" % self.counter
 
 277     def __init__(self, title=None, href="", root=None):
 
 285         self.href = href.format(counter=self.root.counter)
 
 286         self.number = self.root.counter
 
 287         self.root.counter += 1
 
 289     def add(self, title, href):
 
 290         subtoc = type(self)(title, href, root=self.root)
 
 291         self.children.append(subtoc)
 
 294     def render(self, nav_map):
 
 295         for child in self.children:
 
 296             nav_point = etree.Element(NCXNS('navPoint'))
 
 297             nav_point.set('id', 'NavPoint-%d' % child.number)
 
 298             nav_point.set('playOrder', str(child.number))
 
 300             nav_label = etree.Element(NCXNS('navLabel'))
 
 301             text = etree.Element(NCXNS('text'))
 
 302             text.text = child.title
 
 303             nav_label.append(text)
 
 304             nav_point.append(nav_label)
 
 306             content = etree.Element(NCXNS('content'))
 
 307             content.set('src', child.href)
 
 308             nav_point.append(content)
 
 309             nav_map.append(nav_point)
 
 310             child.render(nav_point)
 
 315 class AsideR(NaturalText):
 
 316     def render(self, element, ctx):
 
 317         outputs = list(super(AsideR, self).render(element, ctx))
 
 318         anchor = ctx.footnotes.append(outputs)
 
 319         wrapper, inside = self.text_container()  # etree.Element('_', part_no=str(ctx.part_no))
 
 320         inside.append(anchor)
 
 322 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
 
 324 EpubFormat.renderers.register(core.Aside, 'comment', Silent())
 
 327 class DivR(NaturalText):
 
 328     def container(self, ctx):
 
 329         root, inner = super(DivR, self).container(ctx)
 
 330         if getattr(ctx, 'inline', False):
 
 332             inner.set('style', 'display: block;')
 
 334 EpubFormat.renderers.register(core.Div, None, DivR('div'))
 
 335 EpubFormat.renderers.register(core.Div, 'p', NaturalText('p'))
 
 337 EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
 
 338 EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
 
 339 EpubFormat.renderers.register(core.Div, 'item', NaturalText('li'))
 
 340 EpubFormat.renderers.register(core.Span, 'item', NaturalText('li'))
 
 343 class DivImageR(EpubRenderer):
 
 344     def render(self, element, ctx):
 
 345         src = element.attrib.get('src', '')
 
 346         ctx.images.append(src)
 
 348             raise BuildError('Bad image URL')
 
 349         src = src.rsplit('/', 1)[1]
 
 350         return super(DivImageR, self).render(element, Context(ctx, src=src))
 
 352     def container(self, ctx):
 
 353         root, inner = super(DivImageR, self).container(ctx)
 
 354         src = getattr(ctx, 'src', '')
 
 355         inner.set('src', src)
 
 356         # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
 
 358 EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
 
 361 class DivVideoR(Silent):
 
 362     def render(self, element, ctx):
 
 363         src = VIDEO_PROVIDERS[element.attrib.get('provider')]['url'] % element.attrib.get('videoid', '')
 
 364         return super(DivVideoR, self).render(element, Context(ctx, src=src))
 
 366     def container(self, ctx):
 
 367         root, inner = super(DivVideoR, self).container(ctx)
 
 368         src = getattr(ctx, 'src', '')
 
 369         link = etree.Element('a', {'href': src})
 
 373 EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p'))
 
 376 class HeaderR(NaturalText):
 
 377     def subcontext(self, element, ctx):
 
 378         return Context(ctx, inline=True)
 
 379 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
 
 382 class SectionR(NaturalText):
 
 385     def render(self, element, ctx):
 
 387         if element.getparent() is not None:
 
 388             tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
 
 389             ctx = Context(ctx, toc=tocitem)
 
 390         return super(SectionR, self).render(element, ctx)
 
 391 EpubFormat.renderers.register(core.Section, None, SectionR())
 
 394 class SpanR(NaturalText):
 
 396 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
 
 397 EpubFormat.renderers.register(core.Span, 'cite', SpanR('i'))
 
 398 EpubFormat.renderers.register(core.Span, 'emp', SpanR('b'))
 
 399 EpubFormat.renderers.register(core.Span, 'emph', SpanR('i'))
 
 402 class SpanLink(EpubRenderer):
 
 403     def render(self, element, ctx):
 
 404         parts = super(SpanLink, self).render(element, ctx)
 
 406             src = element.attrib.get('href', '')
 
 407             if src.startswith('file://'):
 
 408                 src = ctx.files_path + src[7:]
 
 409             part[0].attrib['href'] = src
 
 411 EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))