1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
9 from copy import deepcopy
10 from mimetypes import guess_type
11 from tempfile import NamedTemporaryFile
13 from urllib2 import urlopen
15 from lxml import etree
16 from librarian import OPFNS, NCXNS, XHTMLNS, DCNS, BuildError
17 from librarian import core
18 from librarian.formats import Format
19 from librarian.formats.cover.evens import EvensCover
20 from librarian.output import OutputFile
21 from librarian.renderers import Register, TreeRenderer, UnknownElement
22 from librarian.utils import Context, get_resource, extend_element
25 class EpubFormat(Format):
30 renderers = Register()
32 def __init__(self, doc, cover=None, with_fonts=True):
33 super(EpubFormat, self).__init__(doc)
34 self.with_fonts = with_fonts
39 return self.doc.meta.get_one(DCNS(tag))
41 def build(self, ctx=None):
43 def add_file(url, file_id):
44 filename = url.rsplit('/', 1)[1]
45 if url.startswith('file://'):
46 url = ctx.files_path + urllib.quote(url[7:])
47 if url.startswith('/'):
48 url = 'http://milpeer.eu' + url
49 file_content = urlopen(url).read()
50 zip.writestr(os.path.join('OPS', filename), file_content)
51 manifest.append(etree.fromstring(
52 '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
54 opf = etree.parse(get_resource('formats/epub/res/content.opf'))
55 manifest = opf.find(OPFNS('manifest'))
56 guide = opf.find(OPFNS('guide'))
57 spine = opf.find(OPFNS('spine'))
59 author = ", ". join(self.doc.meta.get(DCNS('creator')) or [])
60 title = self.doc.meta.title()
61 opf.find('.//' + DCNS('creator')).text = author
62 opf.find('.//' + DCNS('title')).text = title
64 output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
65 zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
67 mime = zipfile.ZipInfo()
68 mime.filename = 'mimetype'
69 mime.compress_type = zipfile.ZIP_STORED
71 zip.writestr(mime, 'application/epub+zip')
72 zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
73 'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
74 '<rootfiles><rootfile full-path="OPS/content.opf" '
75 'media-type="application/oebps-package+xml" />'
76 '</rootfiles></container>')
78 toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
79 '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
80 '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
81 'version="2005-1"><head></head><docTitle></docTitle><navMap>'
83 # nav_map = toc_file[-1]
85 if self.cover is not None:
86 # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
87 cover = self.cover(self.doc)
89 cover_output = cover.build()
90 cover_name = 'cover.%s' % cover.format_ext
91 zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
94 cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
95 cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
96 zip.writestr('OPS/cover.html', etree.tostring(
97 cover_tree, method="html", pretty_print=True))
99 if cover.uses_dc_cover:
100 if self.doc.meta.get_one('cover_by'):
101 self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
102 if self.doc.meta.get_one('cover_source'):
103 self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
105 manifest.append(etree.fromstring(
106 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
107 manifest.append(etree.fromstring(
108 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
109 spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
110 opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
111 guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
114 ctx = Context(format=self)
119 ctx.footnotes = Footnotes()
123 wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
124 for e in self.render(self.doc.edoc.getroot(), ctx):
125 if not len(e) and not (e.text and e.text.strip()):
127 wrap = deepcopy(wrap_tmpl)
128 extend_element(wrap.find('//*[@id="book-text"]'), e)
130 partstr = 'part%d' % int(e.get('part_no'))
131 manifest.append(manifest.makeelement(OPFNS('item'), attrib={
133 'href': partstr + ".html",
134 'media-type': 'application/xhtml+xml',
136 spine.append(spine.makeelement(OPFNS('itemref'), attrib={
139 zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
141 for i, url in enumerate(ctx.images):
142 add_file(url, 'image%s' % i)
144 if len(ctx.footnotes.output):
145 ctx.toc.add("Przypisy", "footnotes.html")
146 manifest.append(etree.Element(
147 OPFNS('item'), id='footnotes', href='footnotes.html',
148 **{'media-type': "application/xhtml+xml"}))
149 spine.append(etree.Element('itemref', idref='footnotes'))
150 wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
151 extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
153 # chars = chars.union(used_chars(html_tree.getroot()))
154 zip.writestr('OPS/footnotes.html', etree.tostring(
155 wrap, method="html", pretty_print=True))
158 'Information about the resource',
159 'Publisher: %s' % self.dc('publisher'),
160 'Rights: %s' % self.dc('rights'),
161 'Intended audience: %s' % self.dc('audience'),
162 self.dc('description'),
163 'Resource prepared using MIL/PEER editing platform.',
164 'Source available at %s' % ctx.source_url,
166 footer_wrap = deepcopy(wrap_tmpl)
167 footer_body = footer_wrap.find('//*[@id="book-text"]')
168 for line in footer_text:
169 footer_line = etree.Element('p')
170 footer_line.text = line
171 footer_body.append(footer_line)
172 manifest.append(manifest.makeelement(OPFNS('item'), attrib={
174 'href': "footer.html",
175 'media-type': 'application/xhtml+xml',
177 spine.append(spine.makeelement(OPFNS('itemref'), attrib={
180 zip.writestr('OPS/footer.html', etree.tostring(footer_wrap, method='html'))
182 zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
183 ctx.toc.render(toc_file[-1])
184 zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
186 return OutputFile.from_filename(output_file.name)
188 def render(self, element, ctx):
189 return self.renderers.get_for(element).render(element, ctx)
194 class EpubRenderer(TreeRenderer):
195 """ Renders insides as XML in a <_/> container. """
196 def container(self, ctx):
197 root, inner = super(EpubRenderer, self).container()
198 root.set("part_no", str(ctx.part_no))
201 def render(self, element, ctx):
202 subctx = self.subcontext(element, ctx)
203 wrapper, inside = self.container(ctx)
205 extend_element(inside, self.render_text(element.text, ctx))
206 for child in element:
208 child_renderer = ctx.format.renderers.get_for(child)
209 except UnknownElement:
212 if getattr(child_renderer, 'epub_separate', False):
215 for child_part in child_renderer.render(child, subctx):
217 wrapper, inside = self.container(ctx)
219 child_parts = list(child_renderer.render(child, subctx))
220 extend_element(inside, child_parts[0])
221 if len(child_parts) > 1:
223 for child_part in child_parts[1:-1]:
225 wrapper, inside = self.container(ctx)
226 extend_element(inside, child_parts[-1])
229 extend_element(inside, self.render_text(child.tail, ctx))
233 class NaturalText(EpubRenderer):
234 def render_text(self, text, ctx):
235 root, inner = self.text_container()
236 chunks = re.split('(?<=\s\w) ', text)
237 inner.text = chunks[0]
238 for chunk in chunks[1:]:
239 x = etree.Entity("nbsp")
245 class Silent(EpubRenderer):
246 def render_text(self, text, ctx):
247 root, inner = self.text_container()
251 class Footnotes(object):
254 self.output = etree.Element("_")
256 def append(self, items):
259 "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
260 id="footnote-%d" % self.counter,
261 style="float:left;margin-right:1em")
262 e.text = "[%d]" % self.counter
264 self.output.append(e)
266 extend_element(self.output, item)
267 anchor = etree.Element(
268 "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
269 anchor.text = "[%d]" % self.counter
274 def __init__(self, title=None, href="", root=None):
282 self.href = href.format(counter=self.root.counter)
283 self.number = self.root.counter
284 self.root.counter += 1
286 def add(self, title, href):
287 subtoc = type(self)(title, href, root=self.root)
288 self.children.append(subtoc)
291 def render(self, nav_map):
292 for child in self.children:
293 nav_point = etree.Element(NCXNS('navPoint'))
294 nav_point.set('id', 'NavPoint-%d' % child.number)
295 nav_point.set('playOrder', str(child.number))
297 nav_label = etree.Element(NCXNS('navLabel'))
298 text = etree.Element(NCXNS('text'))
299 text.text = child.title
300 nav_label.append(text)
301 nav_point.append(nav_label)
303 content = etree.Element(NCXNS('content'))
304 content.set('src', child.href)
305 nav_point.append(content)
306 nav_map.append(nav_point)
307 child.render(nav_point)
312 class AsideR(NaturalText):
313 def render(self, element, ctx):
314 outputs = list(super(AsideR, self).render(element, ctx))
315 anchor = ctx.footnotes.append(outputs)
316 wrapper, inside = self.text_container() # etree.Element('_', part_no=str(ctx.part_no))
317 inside.append(anchor)
319 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
321 EpubFormat.renderers.register(core.Aside, 'comment', Silent())
324 class DivR(NaturalText):
325 def container(self, ctx):
326 root, inner = super(DivR, self).container(ctx)
327 if getattr(ctx, 'inline', False):
329 inner.set('style', 'display: block;')
331 EpubFormat.renderers.register(core.Div, None, DivR('div'))
332 EpubFormat.renderers.register(core.Div, 'p', NaturalText('p'))
334 EpubFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
335 EpubFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
336 EpubFormat.renderers.register(core.Div, 'item', NaturalText('li'))
337 EpubFormat.renderers.register(core.Span, 'item', NaturalText('li'))
340 class DivImageR(EpubRenderer):
341 def render(self, element, ctx):
342 src = element.attrib.get('src', '')
343 ctx.images.append(src)
345 raise BuildError('Bad image URL')
346 src = src.rsplit('/', 1)[1]
347 return super(DivImageR, self).render(element, Context(ctx, src=src))
349 def container(self, ctx):
350 root, inner = super(DivImageR, self).container(ctx)
351 src = getattr(ctx, 'src', '')
352 inner.set('src', src)
353 # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
355 EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
358 class DivVideoR(Silent):
359 def render(self, element, ctx):
360 src = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid', '')
361 return super(DivVideoR, self).render(element, Context(ctx, src=src))
363 def container(self, ctx):
364 root, inner = super(DivVideoR, self).container(ctx)
365 src = getattr(ctx, 'src', '')
366 link = etree.Element('a', {'href': src})
370 EpubFormat.renderers.register(core.Div, 'video', DivVideoR('p'))
373 class HeaderR(NaturalText):
374 def subcontext(self, element, ctx):
375 return Context(ctx, inline=True)
376 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
379 class SectionR(NaturalText):
382 def render(self, element, ctx):
384 if element.getparent() is not None:
385 tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
386 ctx = Context(ctx, toc=tocitem)
387 return super(SectionR, self).render(element, ctx)
388 EpubFormat.renderers.register(core.Section, None, SectionR())
391 class SpanR(NaturalText):
393 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
394 EpubFormat.renderers.register(core.Span, 'cite', SpanR('i'))
395 EpubFormat.renderers.register(core.Span, 'emp', SpanR('b'))
396 EpubFormat.renderers.register(core.Span, 'emph', SpanR('i'))
399 class SpanLink(EpubRenderer):
400 def render(self, element, ctx):
401 parts = super(SpanLink, self).render(element, ctx)
403 src = element.attrib.get('href', '')
404 if src.startswith('file://'):
405 src = ctx.files_path + src[7:]
406 part[0].attrib['href'] = src
408 EpubFormat.renderers.register(core.Span, 'link', SpanLink('a'))