b7cf539271f7a8d9a20d2cd4e51ea5d13b57c55a
[librarian.git] / librarian / formats / epub / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 from copy import deepcopy
8 from tempfile import NamedTemporaryFile
9 import zipfile
10 from lxml import etree
11 from librarian import OPFNS, NCXNS, XHTMLNS
12 from librarian import core
13 from librarian.formats import Format
14 from librarian.formats.cover.wolnelektury import WLCover
15 from librarian.output import OutputFile
16 from librarian.renderers import Register, TreeRenderer, UnknownElement
17 from librarian.utils import Context, get_resource, extend_element
18
19
20 class EpubFormat(Format):
21     format_name = 'EPUB'
22     format_ext = 'epub'
23
24     cover = WLCover
25     renderers = Register()
26
27     def __init__(self, doc, cover=None, with_fonts=True):
28         super(EpubFormat, self).__init__(doc)
29         self.with_fonts = with_fonts
30         if cover is not None:
31             self.cover = cover
32
33     def build(self):
34         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
35         manifest = opf.find(OPFNS('manifest'))
36         guide = opf.find(OPFNS('guide'))
37         spine = opf.find(OPFNS('spine'))
38
39         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
40         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
41
42         mime = zipfile.ZipInfo()
43         mime.filename = 'mimetype'
44         mime.compress_type = zipfile.ZIP_STORED
45         mime.extra = ''
46         zip.writestr(mime, 'application/epub+zip')
47         zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" '
48                      'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
49                      '<rootfiles><rootfile full-path="OPS/content.opf" '
50                      'media-type="application/oebps-package+xml" />'
51                      '</rootfiles></container>')
52
53         toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
54                                     '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
55                                     '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
56                                     'version="2005-1"><head></head><docTitle></docTitle><navMap>'
57                                     '</navMap></ncx>')
58         # nav_map = toc_file[-1]
59
60         if self.cover is not None:
61             cover = self.cover(self.doc)
62             cover_output = cover.build()
63             cover_name = 'cover.%s' % cover.format_ext
64             zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
65             del cover_output
66
67             cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
68             cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
69             zip.writestr('OPS/cover.html', etree.tostring(
70                             cover_tree, method="html", pretty_print=True))
71
72             if cover.uses_dc_cover:
73                 if self.doc.meta.get_one('cover_by'):
74                     self.doc.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
75                 if self.doc.meta.get_one('cover_source'):
76                     self.doc.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
77
78             manifest.append(etree.fromstring(
79                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
80             manifest.append(etree.fromstring(
81                 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
82             spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
83             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
84             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
85
86         ctx = Context(format=self)
87         ctx.toc = TOC()
88         ctx.toc_level = 0
89         ctx.footnotes = Footnotes()
90         ctx.part_no = 0
91
92         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
93         for e in self.render(self.doc.edoc.getroot(), ctx):
94             if not len(e) and not e.text.strip():
95                 continue
96             wrap = deepcopy(wrap_tmpl)
97             extend_element(wrap.find('//*[@id="book-text"]'), e)
98
99             partstr = 'part%d' % int(e.get('part_no'))
100             manifest.append(manifest.makeelement(OPFNS('item'), attrib={
101                                  'id': partstr,
102                                  'href': partstr + ".html",
103                                  'media-type': 'application/xhtml+xml',
104                              }))
105             spine.append(spine.makeelement(OPFNS('itemref'), attrib={
106                         'idref': partstr,
107                     }))
108             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
109
110         if len(ctx.footnotes.output):
111             ctx.toc.add("Przypisy", "footnotes.html")
112             manifest.append(etree.Element(
113                 OPFNS('item'), id='footnotes', href='footnotes.html',
114                 **{'media-type': "application/xhtml+xml"}))
115             spine.append(etree.Element('itemref', idref='footnotes'))
116             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
117             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
118             
119             # chars = chars.union(used_chars(html_tree.getroot()))
120             zip.writestr('OPS/footnotes.html', etree.tostring(
121                                 wrap, method="html", pretty_print=True))
122
123         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
124         ctx.toc.render(toc_file[-1])
125         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
126         zip.close()
127         return OutputFile.from_filename(output_file.name)
128
129     def render(self, element, ctx):
130         return self.renderers.get_for(element).render(element, ctx)
131
132
133 # Helpers
134
135 class EpubRenderer(TreeRenderer):
136     """ Renders insides as XML in a <_/> container. """
137     def container(self, ctx):
138         root, inner = super(EpubRenderer, self).container()
139         root.set("part_no", str(ctx.part_no))
140         return root, inner
141
142     def render(self, element, ctx):
143         subctx = self.subcontext(element, ctx)
144         wrapper, inside = self.container(ctx)
145         if element.text:
146             extend_element(inside, self.render_text(element.text, ctx))
147         for child in element:
148             try:
149                 child_renderer = ctx.format.renderers.get_for(child)
150             except UnknownElement:
151                 continue
152             else:
153                 if getattr(child_renderer, 'epub_separate', False):
154                     yield wrapper
155                     ctx.part_no += 1
156                     for child_part in child_renderer.render(child, subctx):
157                         yield child_part
158                     wrapper, inside = self.container(ctx)
159                 else:
160                     child_parts = list(child_renderer.render(child, subctx))
161                     extend_element(inside, child_parts[0])
162                     if len(child_parts) > 1:
163                         yield wrapper
164                         for child_part in child_parts[1:-1]:
165                             yield child_part
166                         wrapper, inside = self.container(ctx)
167                         extend_element(inside, child_parts[-1])
168             finally:
169                 if child.tail:
170                     extend_element(inside, self.render_text(child.tail, ctx))
171         yield wrapper
172
173
174 class Footnotes(object):
175     def __init__(self):
176         self.counter = 0
177         self.output = etree.Element("_")
178
179     def append(self, items):
180         self.counter += 1
181         e = etree.Element(
182             "a", href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
183             id="footnote-%d" % self.counter,
184             style="float:left;margin-right:1em")
185         e.text = "[%d]" % self.counter
186         e.tail = " "
187         self.output.append(e)
188         for item in items:
189             extend_element(self.output, item)
190         anchor = etree.Element(
191             "a", href="footnotes.html#footnote-%d" % self.counter, id="footnote-anchor-%d" % self.counter)
192         anchor.text = "[%d]" % self.counter
193         return anchor
194
195
196 class TOC(object):
197     def __init__(self, title=None, href="", root=None):
198         if root is None:
199             self.counter = 0
200             self.root = self
201         else:
202             self.root = root
203         self.children = []
204         self.title = title
205         self.href = href.format(counter=self.root.counter)
206         self.number = self.root.counter
207         self.root.counter += 1
208
209     def add(self, title, href):
210         subtoc = type(self)(title, href, root=self.root)
211         self.children.append(subtoc)
212         return subtoc
213
214     def render(self, nav_map):
215         for child in self.children:
216             nav_point = etree.Element(NCXNS('navPoint'))
217             nav_point.set('id', 'NavPoint-%d' % child.number)
218             nav_point.set('playOrder', str(child.number))
219
220             nav_label = etree.Element(NCXNS('navLabel'))
221             text = etree.Element(NCXNS('text'))
222             text.text = child.title
223             nav_label.append(text)
224             nav_point.append(nav_label)
225
226             content = etree.Element(NCXNS('content'))
227             content.set('src', child.href)
228             nav_point.append(content)
229             nav_map.append(nav_point)
230             child.render(nav_point)
231
232
233 # Renderers
234
235 class AsideR(EpubRenderer):
236     def render(self, element, ctx):
237         outputs = list(super(AsideR, self).render(element, ctx))
238         anchor = ctx.footnotes.append(outputs)
239         wrapper, inside = self.text_container()  # etree.Element('_', part_no=str(ctx.part_no))
240         inside.append(anchor)
241         yield wrapper
242 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
243
244
245 class DivR(EpubRenderer):
246     def container(self, ctx):
247         root, inner = super(DivR, self).container(ctx)
248         if getattr(ctx, 'inline', False):
249             inner.tag = 'span'
250             inner.set('style', 'display: block;')
251         return root, inner
252 EpubFormat.renderers.register(core.Div, None, DivR('div'))
253
254
255 class HeaderR(EpubRenderer):
256     def subcontext(self, element, ctx):
257         return Context(ctx, inline=True)
258 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
259
260
261 class SectionR(EpubRenderer):
262     epub_separate = True
263
264     def render(self, element, ctx):
265         # Add 'poczatek'?
266         if element.getparent() is not None:
267             tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
268             ctx = Context(ctx, toc=tocitem)
269         return super(SectionR, self).render(element, ctx)
270 EpubFormat.renderers.register(core.Section, None, SectionR())
271
272
273 class SpanR(EpubRenderer):
274     pass
275 EpubFormat.renderers.register(core.Span, None, SpanR('span'))