bf21a6f70f873fe316010861d482955ed95dd75a
[librarian.git] / librarian / formats / epub / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 import urllib
8 from copy import deepcopy
9 from mimetypes import guess_type
10 from tempfile import NamedTemporaryFile
11 import zipfile
12 from urllib2 import urlopen
13
14 from lxml import etree
15 from librarian import OPFNS, NCXNS, XHTMLNS, DCNS
16 from librarian import core
17 from librarian.formats import Format
18 from librarian.formats.cover.evens import EvensCover
19 from librarian.output import OutputFile
20 from librarian.renderers import Register, TreeRenderer, UnknownElement
21 from librarian.utils import Context, get_resource, extend_element
22
23
24 class EpubFormat(Format):
25     format_name = 'EPUB'
26     format_ext = 'epub'
27
28     cover = EvensCover
29     renderers = Register()
30
31     def __init__(self, doc, cover=None, with_fonts=True):
32         super(EpubFormat, self).__init__(doc)
33         self.with_fonts = with_fonts
34         if cover is not None:
35             self.cover = cover
36
37     def build(self, ctx=None):
38
39         def add_file(url, file_id):
40             filename = url.rsplit('/', 1)[1]
41             if url.startswith('file://'):
42                 url = ctx.files_path + urllib.quote(url[7:])
43             if url.startswith('/'):
44                 url = 'http://milpeer.eu' + url
45             file_content = urlopen(url).read()
46             zip.writestr(os.path.join('OPS', filename), file_content)
47             manifest.append(etree.fromstring(
48                 '<item id="%s" href="%s" media-type="%s" />' % (file_id, filename, guess_type(url)[0])))
49
50         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
51         manifest = opf.find(OPFNS('manifest'))
52         guide = opf.find(OPFNS('guide'))
53         spine = opf.find(OPFNS('spine'))
54
55         author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
56         title = self.doc.meta.title()
57         opf.find('.//' + DCNS('creator')).text = author
58         opf.find('.//' + DCNS('title')).text = title
59
60         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
61         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
62
63         mime = zipfile.ZipInfo()
64         mime.filename = 'mimetype'
65         mime.compress_type = zipfile.ZIP_STORED
66         mime.extra = ''
67         zip.writestr(mime, 'application/epub+zip')
68         zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
69                      'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
70                      '<rootfiles><rootfile full-path="OPS/content.opf" '
71                      'media-type="application/oebps-package+xml" />'
72                      '</rootfiles></container>')
73
74         toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
75                                     '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
76                                     '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
77                                     'version="2005-1"><head></head><docTitle></docTitle><navMap>'
78                                     '</navMap></ncx>')
79         # nav_map = toc_file[-1]
80
81         if self.cover is not None:
82             # cover_image = self.doc.meta.get(DCNS('relation.coverimage.url'))[0]
83             cover = self.cover(self.doc)
84             cover.set_images(ctx)
85             cover_output = cover.build()
86             cover_name = 'cover.%s' % cover.format_ext
87             zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
88             del cover_output
89
90             cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
91             cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
92             zip.writestr('OPS/cover.html', etree.tostring(
93                             cover_tree, method="html", pretty_print=True))
94
95             if cover.uses_dc_cover:
96                 if self.doc.meta.get_one('cover_by'):
97                     self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
98                 if self.doc.meta.get_one('cover_source'):
99                     self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
100
101             manifest.append(etree.fromstring(
102                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
103             manifest.append(etree.fromstring(
104                 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
105             spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
106             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
107             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
108
109         if not ctx:
110             ctx = Context(format=self)
111         else:
112             ctx.format = self
113         ctx.toc = TOC()
114         ctx.toc_level = 0
115         ctx.footnotes = Footnotes()
116         ctx.images = []
117         ctx.part_no = 0
118
119         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
120         for e in self.render(self.doc.edoc.getroot(), ctx):
121             if not len(e) and not (e.text and e.text.strip()):
122                 continue
123             wrap = deepcopy(wrap_tmpl)
124             extend_element(wrap.find('//*[@id="book-text"]'), e)
125
126             partstr = 'part%d' % int(e.get('part_no'))
127             manifest.append(manifest.makeelement(OPFNS('item'), attrib={
128                                  'id': partstr,
129                                  'href': partstr + ".html",
130                                  'media-type': 'application/xhtml+xml',
131                              }))
132             spine.append(spine.makeelement(OPFNS('itemref'), attrib={
133                         'idref': partstr,
134                     }))
135             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
136
137         for i, url in enumerate(ctx.images):
138             add_file(url, 'image%s' % i)
139
140         if len(ctx.footnotes.output):
141             ctx.toc.add("Przypisy", "footnotes.html")
142             manifest.append(etree.Element(
143                 OPFNS('item'), id='footnotes', href='footnotes.html',
144                 **{'media-type': "application/xhtml+xml"}))
145             spine.append(etree.Element('itemref', idref='footnotes'))
146             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
147             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
148             
149             # chars = chars.union(used_chars(html_tree.getroot()))
150             zip.writestr('OPS/footnotes.html', etree.tostring(
151                                 wrap, method="html", pretty_print=True))
152
153         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
154         ctx.toc.render(toc_file[-1])
155         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
156         zip.close()
157         return OutputFile.from_filename(output_file.name)
158
159     def render(self, element, ctx):
160         return self.renderers.get_for(element).render(element, ctx)
161
162
163 # Helpers
164
165 class EpubRenderer(TreeRenderer):
166     """ Renders insides as XML in a <_/> container. """
167     def container(self, ctx):
168         root, inner = super(EpubRenderer, self).container()
169         root.set("part_no", str(ctx.part_no))
170         return root, inner
171
172     def render(self, element, ctx):
173         subctx = self.subcontext(element, ctx)
174         wrapper, inside = self.container(ctx)
175         if element.text:
176             extend_element(inside, self.render_text(element.text, ctx))
177         for child in element:
178             try:
179                 child_renderer = ctx.format.renderers.get_for(child)
180             except UnknownElement:
181                 continue
182             else:
183                 if getattr(child_renderer, 'epub_separate', False):
184                     yield wrapper
185                     ctx.part_no += 1
186                     for child_part in child_renderer.render(child, subctx):
187                         yield child_part
188                     wrapper, inside = self.container(ctx)
189                 else:
190                     child_parts = list(child_renderer.render(child, subctx))
191                     extend_element(inside, child_parts[0])
192                     if len(child_parts) > 1:
193                         yield wrapper
194                         for child_part in child_parts[1:-1]:
195                             yield child_part
196                         wrapper, inside = self.container(ctx)
197                         extend_element(inside, child_parts[-1])
198             finally:
199                 if child.tail:
200                     extend_element(inside, self.render_text(child.tail, ctx))
201         yield wrapper
202
203
204 class Footnotes(object):
205     def __init__(self):
206         self.counter = 0
207         self.output = etree.Element("_")
208
209     def append(self, items):
210         self.counter += 1
211         e = etree.Element(
212             "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
213             id="footnote-%d" % self.counter,
214             style="float:left;margin-right:1em")
215         e.text = "[%d]" % self.counter
216         e.tail = " "
217         self.output.append(e)
218         for item in items:
219             extend_element(self.output, item)
220         anchor = etree.Element(
221             "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
222         anchor.text = "[%d]" % self.counter
223         return anchor
224
225
226 class TOC(object):
227     def __init__(self, title=None, href="", root=None):
228         if root is None:
229             self.counter = 0
230             self.root = self
231         else:
232             self.root = root
233         self.children = []
234         self.title = title
235         self.href = href.format(counter=self.root.counter)
236         self.number = self.root.counter
237         self.root.counter += 1
238
239     def add(self, title, href):
240         subtoc = type(self)(title, href, root=self.root)
241         self.children.append(subtoc)
242         return subtoc
243
244     def render(self, nav_map):
245         for child in self.children:
246             nav_point = etree.Element(NCXNS('navPoint'))
247             nav_point.set('id', 'NavPoint-%d' % child.number)
248             nav_point.set('playOrder', str(child.number))
249
250             nav_label = etree.Element(NCXNS('navLabel'))
251             text = etree.Element(NCXNS('text'))
252             text.text = child.title
253             nav_label.append(text)
254             nav_point.append(nav_label)
255
256             content = etree.Element(NCXNS('content'))
257             content.set('src', child.href)
258             nav_point.append(content)
259             nav_map.append(nav_point)
260             child.render(nav_point)
261
262
263 # Renderers
264
265 class AsideR(EpubRenderer):
266     def render(self, element, ctx):
267         outputs = list(super(AsideR, self).render(element, ctx))
268         anchor = ctx.footnotes.append(outputs)
269         wrapper, inside = self.text_container()  # etree.Element('_', part_no=str(ctx.part_no))
270         inside.append(anchor)
271         yield wrapper
272 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
273
274
275 class DivR(EpubRenderer):
276     def container(self, ctx):
277         root, inner = super(DivR, self).container(ctx)
278         if getattr(ctx, 'inline', False):
279             inner.tag = 'span'
280             inner.set('style', 'display: block;')
281         return root, inner
282 EpubFormat.renderers.register(core.Div, None, DivR('div'))
283
284
285 class DivImageR(EpubRenderer):
286     def render(self, element, ctx):
287         src = element.attrib.get('src', '')
288         ctx.images.append(src)
289         src = src.rsplit('/', 1)[1]
290         return super(DivImageR, self).render(element, Context(ctx, src=src))
291
292     def container(self, ctx):
293         root, inner = super(DivImageR, self).container(ctx)
294         src = getattr(ctx, 'src', '')
295         inner.set('src', src)
296         # inner.set('style', 'display: block; width: 60%; margin: 3em auto')
297         return root, inner
298 EpubFormat.renderers.register(core.Div, 'img', DivImageR('img'))
299
300
301 class HeaderR(EpubRenderer):
302     def subcontext(self, element, ctx):
303         return Context(ctx, inline=True)
304 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
305
306
307 class SectionR(EpubRenderer):
308     epub_separate = True
309
310     def render(self, element, ctx):
311         # Add 'poczatek'?
312         if element.getparent() is not None:
313             tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
314             ctx = Context(ctx, toc=tocitem)
315         return super(SectionR, self).render(element, ctx)
316 EpubFormat.renderers.register(core.Section, None, SectionR())
317
318
319 class SpanR(EpubRenderer):
320     pass
321 EpubFormat.renderers.register(core.Span, None, SpanR('span'))