1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
9 from copy import deepcopy
10 from mimetypes import guess_type
11 from tempfile import NamedTemporaryFile
13 from urllib2 import urlopen
15 from lxml import etree
16 from librarian import OPFNS, NCXNS, XHTMLNS, DCNS, BuildError
17 from librarian import core
18 from librarian.formats import Format
19 from librarian.formats.cover.evens import EvensCover
20 from librarian.output import OutputFile
21 from librarian.renderers import Register, TreeRenderer, UnknownElement
22 from librarian.utils import Context, get_resource, extend_element
25 class EpubFormat(Format):
30 renderers = Register()
32 def __init__(self, doc, cover=None, with_fonts=True):
33 super(EpubFormat, self).__init__(doc)
34 self.with_fonts = with_fonts
38 def dc(self, tag, multiple=False):
40 return ', '.join(self.doc.meta.get(DCNS(tag)))
42 return self.doc.meta.get_one(DCNS(tag))
44 def build(self, ctx=None):
46 def add_file(url, file_id):
47 filename = url.rsplit('/', 1)[1]
48 if url.startswith('file://'):
49 url = ctx.files_path + urllib.quote(url[7:])
50 if url.startswith('/'):
51 url = 'http://milpeer.eu' + url
52 file_content = urlopen(url).read()
53 zip.writestr(os.path.join('OPS', filename), file_content)
54 manifest.append(etree.fromstring(
55 '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
57 opf = etree.parse(get_resource('formats/epub/res/content.opf'))
58 manifest = opf.find(OPFNS('manifest'))
59 guide = opf.find(OPFNS('guide'))
60 spine = opf.find(OPFNS('spine'))
62 author = ", ". join(self.doc.meta.get(DCNS('creator')) or [])
63 title = self.doc.meta.title()
64 opf.find('.//' + DCNS('creator')).text = author
65 opf.find('.//' + DCNS('title')).text = title
67 output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
68 zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
70 mime = zipfile.ZipInfo()
71 mime.filename = 'mimetype'
72 mime.compress_type = zipfile.ZIP_STORED
74 zip.writestr(mime, 'application/epub+zip')
75 zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
76 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
77 '<rootfiles><rootfile full-path="OPS/content.opf" '
78 'media-type="application/oebps-package+xml" />'
79 '</rootfiles></container>')
81 toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
82 '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
83 '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
84 'version="2005-1"><head></head><docTitle></docTitle><navMap>'
86 # nav_map = toc_file[-1]
88 if self.cover is not None:
89 # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
90 cover = self.cover(self.doc)
92 cover_output = cover.build()
93 cover_name = 'cover.%s' % cover.format_ext
94 zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
97 cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
98 cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
99 zip.writestr('OPS/cover.html', etree.tostring(
100 cover_tree, method="html", pretty_print=True))
102 if cover.uses_dc_cover:
103 if self.doc.meta.get_one('cover_by'):
104 self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
105 if self.doc.meta.get_one('cover_source'):
106 self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
108 manifest.append(etree.fromstring(
109 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
110 manifest.append(etree.fromstring(
111 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
112 spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
113 opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
114 guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
117 ctx = Context(format=self)
122 ctx.footnotes = Footnotes()
126 wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
127 for e in self.render(self.doc.edoc.getroot(), ctx):
128 if not len(e) and not (e.text and e.text.strip()):
130 wrap = deepcopy(wrap_tmpl)
131 extend_element(wrap.find('//*[@id="book-text"]'), e)
133 partstr = 'part%d' % int(e.get('part_no'))
134 manifest.append(manifest.makeelement(OPFNS('item'), attrib={
136 'href': partstr + ".html",
137 'media-type': 'application/xhtml+xml',
139 spine.append(spine.makeelement(OPFNS('itemref'), attrib={
142 zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
144 for i, url in enumerate(ctx.images):
145 add_file(url, 'image%s' % i)
147 if len(ctx.footnotes.output):
148 ctx.toc.add("Przypisy", "footnotes.html")
149 manifest.append(etree.Element(
150 OPFNS('item'), id='footnotes', href='footnotes.html',
151 **{'media-type': "application/xhtml+xml"}))
152 spine.append(etree.Element('itemref', idref='footnotes'))
153 wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
154 extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
156 # chars = chars.union(used_chars(html_tree.getroot()))
157 zip.writestr('OPS/footnotes.html', etree.tostring(
158 wrap, method="html", pretty_print=True))
161 'Information about the resource',
162 'Publisher: %s' % self.dc('publisher'),
163 'Rights: %s' % self.dc('rights'),
164 'Intended audience: %s' % self.dc('audience', multiple=True),
165 self.dc('description'),
166 'Resource prepared using MIL/PEER editing platform.',
167 'Source available at %s' % ctx.source_url,
169 footer_wrap = deepcopy(wrap_tmpl)
170 footer_body = footer_wrap.find('//*[@id="book-text"]')
171 for line in footer_text:
172 footer_line = etree.Element('p')
173 footer_line.text = line
174 footer_body.append(footer_line)
175 manifest.append(manifest.makeelement(OPFNS('item'), attrib={
177 'href': "footer.html",
178 'media-type': 'application/xhtml+xml',
180 spine.append(spine.makeelement(OPFNS('itemref'), attrib={
183 zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html'))
185 zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
186 ctx.toc.render(toc_file[-1])
187 zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
189 return OutputFile.from_filename(output_file.name)
191 def render(self, element, ctx):
192 return self.renderers.get_for(element).render(element, ctx)
197 class EpubRenderer(TreeRenderer):
198 """ Renders insides as XML in a <_/> container. """
199 def container(self, ctx):
200 root, inner = super(EpubRenderer, self).container()
201 root.set("part_no", str(ctx.part_no))
204 def render(self, element, ctx):
205 subctx = self.subcontext(element, ctx)
206 wrapper, inside = self.container(ctx)
208 extend_element(inside, self.render_text(element.text, ctx))
209 for child in element:
211 child_renderer = ctx.format.renderers.get_for(child)
212 except UnknownElement:
215 if getattr(child_renderer, 'epub_separate', False):
218 for child_part in child_renderer.render(child, subctx):
220 wrapper, inside = self.container(ctx)
222 child_parts = list(child_renderer.render(child, subctx))
223 extend_element(inside, child_parts[0])
224 if len(child_parts) > 1:
226 for child_part in child_parts[1:-1]:
228 wrapper, inside = self.container(ctx)
229 extend_element(inside, child_parts[-1])
232 extend_element(inside, self.render_text(child.tail, ctx))
236 class NaturalText(EpubRenderer):
237 def render_text(self, text, ctx):
238 root, inner = self.text_container()
239 chunks = re.split('(?<=\s\w) ', text)
240 inner.text = chunks[0]
241 for chunk in chunks[1:]:
242 x = etree.Entity("nbsp")
248 class Silent(EpubRenderer):
249 def render_text(self, text, ctx):
250 root, inner = self.text_container()
254 class Footnotes(object):
257 self.output = etree.Element("_")
259 def append(self, items):
262 "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
263 id="footnote-%d" % self.counter,
264 style="float:left;margin-right:1em")
265 e.text = "[%d]" % self.counter
267 self.output.append(e)
269 extend_element(self.output, item)
270 anchor = etree.Element(
271 "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
272 anchor.text = "[%d]" % self.counter
277 def __init__(self, title=None, href="", root=None):
285 self.href = href.format(counter=self.root.counter)
286 self.number = self.root.counter
287 self.root.counter += 1
289 def add(self, title, href):
290 subtoc = type(self)(title, href, root=self.root)
291 self.children.append(subtoc)
294 def render(self, nav_map):
295 for child in self.children:
296 nav_point = etree.Element(NCXNS('navPoint'))
297 nav_point.set('id', 'NavPoint-%d' % child.number)
298 nav_point.set('playOrder', str(child.number))
300 nav_label = etree.Element(NCXNS('navLabel'))
301 text = etree.Element(NCXNS('text'))
302 text.text = child.title
303 nav_label.append(text)
304 nav_point.append(nav_label)
306 content = etree.Element(NCXNS('content'))
307 content.set('src', child.href)
308 nav_point.append(content)
309 nav_map.append(nav_point)
310 child.render(nav_point)
315 class AsideR(NaturalText):
316 def render(self, element, ctx):
317 outputs = list(super(AsideR, self).render(element, ctx))
318 anchor = ctx.footnotes.append(outputs)
319 wrapper, inside = self.text_container() # etree.Element('_', part_no=str(ctx.part_no))
320 inside.append(anchor)
322 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
324 EpubFormat.renderers.register(core.Aside, 'comment', Silent())
327 class DivR(NaturalText):
328 def container(self, ctx):
329 root, inner = super(DivR, self).container(ctx)
330 if getattr(ctx, 'inline', False):
332 inner.set('style', 'display: block;')
334 EpubFormat.renderers.register(core.Div, None, DivR('div'))
335 EpubFormat.renderers.register(core.Div, 'p', NaturalText('p'))
337 EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
338 EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
339 EpubFormat.renderers.register(core.Div, 'item', NaturalText('li'))
340 EpubFormat.renderers.register(core.Span, 'item', NaturalText('li'))
343 class DivImageR(EpubRenderer):
344 def render(self, element, ctx):
345 src = element.attrib.get('src', '')
346 ctx.images.append(src)
348 raise BuildError('Bad image URL')
349 src = src.rsplit('/', 1)[1]
350 return super(DivImageR, self).render(element, Context(ctx, src=src))
352 def container(self, ctx):
353 root, inner = super(DivImageR, self).container(ctx)
354 src = getattr(ctx, 'src', '')
355 inner.set('src', src)
356 # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
358 EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
361 class DivVideoR(Silent):
362 def render(self, element, ctx):
363 src = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid', '')
364 return super(DivVideoR, self).render(element, Context(ctx, src=src))
366 def container(self, ctx):
367 root, inner = super(DivVideoR, self).container(ctx)
368 src = getattr(ctx, 'src', '')
369 link = etree.Element('a', {'href': src})
373 EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p'))
376 class HeaderR(NaturalText):
377 def subcontext(self, element, ctx):
378 return Context(ctx, inline=True)
379 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
382 class SectionR(NaturalText):
385 def render(self, element, ctx):
387 if element.getparent() is not None:
388 tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
389 ctx = Context(ctx, toc=tocitem)
390 return super(SectionR, self).render(element, ctx)
391 EpubFormat.renderers.register(core.Section, None, SectionR())
394 class SpanR(NaturalText):
396 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
397 EpubFormat.renderers.register(core.Span, 'cite', SpanR('i'))
398 EpubFormat.renderers.register(core.Span, 'emp', SpanR('b'))
399 EpubFormat.renderers.register(core.Span, 'emph', SpanR('i'))
402 class SpanLink(EpubRenderer):
403 def render(self, element, ctx):
404 parts = super(SpanLink, self).render(element, ctx)
406 src = element.attrib.get('href', '')
407 if src.startswith('file://'):
408 src = ctx.files_path + src[7:]
409 part[0].attrib['href'] = src
411 EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))