Some experiments with the language: html, epub, covers.
[librarian.git] / librarian / formats / epub / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 from copy import deepcopy
8 from tempfile import NamedTemporaryFile
9 import zipfile
10 from lxml import etree
11 from librarian import OPFNS, NCXNS, XHTMLNS
12 from librarian import core
13 from librarian.formats import Format
14 from librarian.formats.cover.wolnelektury import WLCover
15 from librarian.output import OutputFile
16 from librarian.renderers import Register, TreeRenderer, UnknownElement
17 from librarian.utils import Context, get_resource, extend_element
18
19
20 class EpubFormat(Format):
21     format_name = 'EPUB'
22     format_ext = 'epub'
23
24     cover = WLCover
25     renderers = Register()
26
27     def __init__(self, doc, cover=None, with_fonts=True):
28         super(EpubFormat, self).__init__(doc)
29         self.with_fonts = with_fonts
30         if cover is not None:
31             self.cover = cover
32
33     def build(self):
34         opf = etree.parse(get_resource('formats/epub/res/content.opf'))
35         manifest = opf.find(OPFNS('manifest'))
36         guide = opf.find(OPFNS('guide'))
37         spine = opf.find(OPFNS('spine'))
38
39         output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
40         zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
41
42         mime = zipfile.ZipInfo()
43         mime.filename = 'mimetype'
44         mime.compress_type = zipfile.ZIP_STORED
45         mime.extra = ''
46         zip.writestr(mime, 'application/epub+zip')
47         zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
48                        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
49                        '<rootfiles><rootfile full-path="OPS/content.opf" ' \
50                        'media-type="application/oebps-package+xml" />' \
51                        '</rootfiles></container>')
52
53         toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
54                                '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
55                                '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
56                                'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
57                                '</navMap></ncx>')
58         nav_map = toc_file[-1]
59
60         if self.cover is not None:
61             cover = self.cover(self.doc)
62             cover_output = cover.build()
63             cover_name = 'cover.%s' % cover.format_ext
64             zip.writestr(os.path.join('OPS', cover_name), cover_output.get_string())
65             del cover_output
66
67             cover_tree = etree.parse(get_resource('formats/epub/res/cover.html'))
68             cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
69             zip.writestr('OPS/cover.html', etree.tostring(
70                             cover_tree, method="html", pretty_print=True))
71
72             if cover.uses_dc_cover:
73                 if self.doc.meta.get_one('cover_by'):
74                     document.edoc.getroot().set('data-cover-by', self.doc.meta.get_one('cover_by'))
75                 if self.doc.meta.get_one('cover_source'):
76                     document.edoc.getroot().set('data-cover-source', self.doc.meta.get_one('cover_source'))
77
78             manifest.append(etree.fromstring(
79                 '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
80             manifest.append(etree.fromstring(
81                 '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, cover.mime_type())))
82             spine.insert(0, etree.fromstring('<itemref idref="cover" linear="no" />'))
83             opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
84             guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
85
86
87         ctx = Context(format=self)
88         ctx.toc = TOC()
89         ctx.toc_level = 0
90         ctx.footnotes = Footnotes()
91         ctx.part_no = 0
92
93         wrap_tmpl = etree.parse(get_resource('formats/epub/res/chapter.html'))
94         for e in self.render(self.doc.edoc.getroot(), ctx):
95             if not len(e) and not e.text.strip():
96                 continue
97             wrap = deepcopy(wrap_tmpl)
98             extend_element(wrap.find('//*[@id="book-text"]'), e)
99
100             partstr = 'part%d' % int(e.get('part_no'))
101             manifest.append(manifest.makeelement(OPFNS('item'), attrib={
102                                  'id': partstr,
103                                  'href': partstr + ".html",
104                                  'media-type': 'application/xhtml+xml',
105                              }))
106             spine.append(spine.makeelement(OPFNS('itemref'), attrib={
107                         'idref': partstr,
108                     }))
109             zip.writestr('OPS/%s.html' % partstr, etree.tostring(wrap, method='html'))
110
111         if len(ctx.footnotes.output):
112             ctx.toc.add("Przypisy", "footnotes.html")
113             manifest.append(etree.Element(OPFNS('item'),
114                     id='footnotes', href='footnotes.html',
115                     **{'media-type': "application/xhtml+xml"}))
116             spine.append(etree.Element('itemref', idref='footnotes'))
117             wrap = etree.parse(get_resource('formats/epub/res/footnotes.html'))
118             extend_element(wrap.find('//*[@id="footnotes"]'), ctx.footnotes.output)
119             
120             #chars = chars.union(used_chars(html_tree.getroot()))
121             zip.writestr('OPS/footnotes.html', etree.tostring(
122                                 wrap, method="html", pretty_print=True))
123
124
125         zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True))
126         ctx.toc.render(toc_file[-1])
127         zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True))
128         zip.close()
129         return OutputFile.from_filename(output_file.name)
130
131     def render(self, element, ctx):
132         return self.renderers.get_for(element).render(element, ctx)
133
134
135 # Helpers
136
137 class EpubRenderer(TreeRenderer):
138     """ Renders insides as XML in a <_/> container. """
139     def container(self, ctx):
140         root, inner = super(EpubRenderer, self).container()
141         root.set("part_no", str(ctx.part_no))
142         return root, inner
143
144     def render(self, element, ctx):
145         subctx = self.subcontext(element, ctx)
146         wrapper, inside = self.container(ctx)
147         if element.text:
148             extend_element(inside, self.render_text(element.text, ctx))
149         for child in element:
150             try:
151                 child_renderer = ctx.format.renderers.get_for(child)
152             except UnknownElement:
153                 continue
154             else:
155                 if getattr(child_renderer, 'epub_separate', False):
156                     yield wrapper
157                     ctx.part_no += 1
158                     for child_part in child_renderer.render(child, subctx):
159                         yield child_part
160                     wrapper, inside = self.container(ctx)
161                 else:
162                     child_parts = list(child_renderer.render(child, subctx))
163                     extend_element(inside, child_parts[0])
164                     if len(child_parts) > 1:
165                         yield wrapper
166                         for child_part in child_parts[1:-1]:
167                             yield child_part
168                         wrapper, inside = self.container(ctx)
169                         extend_element(inside, child_parts[-1])
170             finally:
171                 if child.tail:
172                     extend_element(inside, self.render_text(child.tail, ctx))
173         yield wrapper
174
175
176 class Footnotes(object):
177     def __init__(self):
178         self.counter = 0
179         self.output = etree.Element("_")
180
181     def append(self, items):
182         self.counter += 1
183         e = etree.Element("a",
184             href="part%d.html#footnote-anchor-%d" % (int(items[0].get('part_no')), self.counter),
185             id="footnote-%d" % self.counter,
186             style="float:left;margin-right:1em")
187         e.text = "[%d]" % self.counter
188         e.tail = " "
189         self.output.append(e)
190         for item in items:
191             extend_element(self.output, item)
192         anchor = etree.Element("a",
193             id="footnote-anchor-%d" % self.counter,
194             href="footnotes.html#footnote-%d" % self.counter)
195         anchor.text = "[%d]" % self.counter
196         return anchor
197
198
199 class TOC(object):
200     def __init__(self, title=None, href="", root=None):
201         if root is None:
202             self.counter = 0
203             self.root = self
204         else:
205             self.root = root
206         self.children = []
207         self.title = title
208         self.href = href.format(counter=self.root.counter)
209         self.number = self.root.counter
210         self.root.counter += 1
211
212     def add(self, title, href):
213         subtoc = type(self)(title, href, root=self.root)
214         self.children.append(subtoc)
215         return subtoc
216
217     def render(self, nav_map):
218         for child in self.children:
219             nav_point = etree.Element(NCXNS('navPoint'))
220             nav_point.set('id', 'NavPoint-%d' % child.number)
221             nav_point.set('playOrder', str(child.number))
222
223             nav_label = etree.Element(NCXNS('navLabel'))
224             text = etree.Element(NCXNS('text'))
225             text.text = child.title
226             nav_label.append(text)
227             nav_point.append(nav_label)
228
229             content = etree.Element(NCXNS('content'))
230             content.set('src', child.href)
231             nav_point.append(content)
232             nav_map.append(nav_point)
233             child.render(nav_map)
234
235
236 # Renderers
237
238 class AsideR(EpubRenderer):
239     def render(self, element, ctx):
240         outputs = list(super(AsideR, self).render(element, ctx))
241         anchor = ctx.footnotes.append(outputs)
242         wrapper, inside = self.text_container()  #etree.Element('_', part_no=str(ctx.part_no))
243         inside.append(anchor)
244         yield wrapper
245 EpubFormat.renderers.register(core.Aside, None, AsideR('div'))
246
247
248 class DivR(EpubRenderer):
249     def container(self, ctx):
250         root, inner = super(DivR, self).container(ctx)
251         if getattr(ctx, 'inline', False):
252             inner.tag = 'span'
253             inner.set('style', 'display: block;')
254         return root, inner
255 EpubFormat.renderers.register(core.Div, None, DivR('div'))
256
257
258 class HeaderR(EpubRenderer):
259     def subcontext(self, element, ctx):
260         return Context(ctx, inline=True)
261 EpubFormat.renderers.register(core.Header, None, HeaderR('h1'))
262
263
264 class SectionR(EpubRenderer):
265     epub_separate = True
266
267     def render(self, element, ctx):
268         # Add 'poczatek'?
269         if element.getparent() is not None:
270             tocitem = ctx.toc.add(element.meta.title(), 'part%d.html' % ctx.part_no)
271             ctx = Context(ctx, toc=tocitem)
272         return super(SectionR, self).render(element, ctx)
273 EpubFormat.renderers.register(core.Section, None, SectionR())
274
275
276 class SpanR(EpubRenderer):
277     pass
278 EpubFormat.renderers.register(core.Span, None, SpanR('span'))
279