stupid bug in epub
[librarian.git] / librarian / formats / html / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import re
7 from lxml import etree
8 from librarian.formats import Format
9 from librarian.output import OutputFile
10 from librarian.renderers import Register, TreeRenderer
11 from librarian.utils import Context, get_resource
12 from librarian import core
13
14
15 class HtmlFormat(Format):
16     format_name = 'HTML'
17     format_ext = 'html'
18
19     renderers = Register()
20
21     def __init__(self, doc, standalone=False):
22         super(HtmlFormat, self).__init__(doc)
23         self.standalone = standalone
24
25     def build(self, files_path=None):
26         if self.standalone:
27             tmpl = get_resource("formats/html/res/html_standalone.html")
28         else:
29             tmpl = get_resource("formats/html/res/html.html")
30         t = etree.parse(tmpl)
31
32         ctx = Context(format=self)
33         ctx.files_path = files_path
34         ctx.toc = TOC()
35         ctx.toc_level = 0
36         ctx.footnotes = Footnotes()
37
38         if self.standalone:
39             t.find('head/title').text = u"%s (%s)" % (self.doc.meta.title(), self.doc.meta.author())
40
41         t.find('.//div[@id="content"]').extend(
42             self.render(self.doc.edoc.getroot(), ctx))
43         # t.find('.//div[@id="toc"]').append(ctx.toc.render())
44         t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output)
45
46         return OutputFile.from_string(etree.tostring(
47             t, encoding='utf-8', method="html"))
48
49     def render(self, element, ctx):
50         return self.renderers.get_for(element).render(element, ctx)
51
52
53 # Helpers
54
55 class NaturalText(TreeRenderer):
56     def render_text(self, text, ctx):
57         root, inner = self.text_container()
58         chunks = re.split('(?<=\s\w) ', text)
59         inner.text = chunks[0]
60         for chunk in chunks[1:]:
61             x = etree.Entity("nbsp")
62             x.tail = chunk
63             inner.append(x)
64         return root
65
66
67 class LiteralText(TreeRenderer):
68     pass
69
70
71 class Silent(TreeRenderer):
72     def render_text(self, text, ctx):
73         root, inner = self.text_container()
74         return root
75
76
77 class Footnotes(object):
78     def __init__(self):
79         self.counter = 0
80         self.output = etree.Element("_")
81
82     def append(self, item):
83         self.counter += 1
84         e = etree.Element(
85             "a",
86             href="#footnote-anchor-%d" % self.counter,
87             id="footnote-%d" % self.counter,
88             style="float:left;margin-right:1em")
89         e.text = "[%d]" % self.counter
90         e.tail = " "
91         self.output.append(e)
92         self.output.extend(item)
93         anchor = etree.Element(
94             "a",
95             id="footnote-anchor-%d" % self.counter,
96             href="#footnote-%d" % self.counter)
97         anchor.text = "[%d]" % self.counter
98         return anchor
99
100
101 class TOC(object):
102     def __init__(self):
103         self.items = []
104         self.counter = 0
105
106     def add(self, title, level=0):
107         self.counter += 1
108         self.items.append((level, title, self.counter))
109         return self.counter
110
111     def render(self):
112         out = etree.Element("ul", id="toc")
113         curr_level = 0
114         cursor = out
115         for level, title, counter in self.items:
116             while level > curr_level:
117                 ins = etree.Element("ul")
118                 cursor.append(ins)
119                 cursor = ins
120                 curr_level += 1
121             while level < curr_level:
122                 cursor = cursor.getparent()
123                 curr_level -= 1
124             ins = etree.Element("li")
125             ins.append(etree.Element("a", href="#sect%d" % counter))
126             ins[0].text = title
127             cursor.append(ins)
128         return out
129
130
131 # Renderers
132
133 HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside'))
134 HtmlFormat.renderers.register(core.Aside, 'comment', Silent())
135
136
137 class AsideFootnote(NaturalText):
138     def render(self, element, ctx):
139         output = super(AsideFootnote, self).render(element, ctx)
140         anchor = ctx.footnotes.append(output)
141         root, inner = self.container()
142         inner.append(anchor)
143         return root
144 HtmlFormat.renderers.register(core.Aside, 'footnote', AsideFootnote())
145
146
147 class Header(NaturalText):
148     def render(self, element, ctx):
149         root = super(Header, self).render(element, ctx)
150         if ctx.toc_level == 1:
151             d = etree.SubElement(root, 'div', {'class': "page-header"})
152             d.insert(0, root[0])
153         else:
154             root[0].tag = 'h2'
155             if root[0].text:
156                 d = etree.SubElement(
157                     root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'})
158                 # d.text = "per"
159         return root
160
161
162 HtmlFormat.renderers.register(core.Header, None, Header('h1'))
163
164
165 HtmlFormat.renderers.register(core.Div, None, NaturalText('div'))
166
167
168 class DivDefined(NaturalText):
169     def render(self, element, ctx):
170         output = super(DivDefined, self).render(element, ctx)
171         output[0].text = (output[0].text or '') + ':'
172         output[0].attrib['id'] = output[0].text  # not so cool?
173         return output
174
175 HtmlFormat.renderers.register(core.Div, 'defined', DivDefined('dt', {'style': 'display: inline-block'}))
176
177
178 class DivImage(NaturalText):
179     def render(self, element, ctx):
180         output = super(DivImage, self).render(element, ctx)
181         src = element.attrib.get('src', '')
182         if src.startswith('file://'):
183             src = ctx.files_path + src[7:]
184         output[0].attrib['src'] = src
185         output[0].attrib['style'] = 'display: block; width: 60%; margin: 3em auto'
186         return output
187
188 HtmlFormat.renderers.register(core.Div, 'img', DivImage('img'))
189
190
191 class DivVideo(NaturalText):
192     def render(self, element, ctx):
193         output = super(DivVideo, self).render(element, ctx)
194         video_id = element.attrib.get('videoid', '')
195         attribs = {
196             'width': '854',
197             'height': '480',
198             'src': '//www.youtube.com/embed/%s?controls=2&amp;rel=0&amp;showinfo=0&amp;theme=light' % video_id,
199             'frameborder': '0',
200             'allowfullscreen': '',
201         }
202         for attrib, value in attribs.iteritems():
203             output[0].attrib[attrib] = value
204         return output
205
206 HtmlFormat.renderers.register(core.Div, 'video', DivVideo('iframe'))
207
208 HtmlFormat.renderers.register(core.Div, 'item', NaturalText('li'))
209 HtmlFormat.renderers.register(core.Span, 'item', NaturalText('li'))
210 HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
211 HtmlFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
212
213
214 class DivListDefinitions(NaturalText):
215     def render(self, element, ctx):
216         output = super(DivListDefinitions, self).render(element, ctx)
217         # if ctx.toc_level > 2:
218         #     output[0].attrib['style'] = 'float: right'
219         return output
220
221 HtmlFormat.renderers.register(core.Div, 'list.definitions', DivListDefinitions('ul'))
222 HtmlFormat.renderers.register(core.Div, 'p', NaturalText('p'))
223
224
225 class Section(NaturalText):
226     def subcontext(self, element, ctx):
227         return Context(ctx, toc_level=ctx.toc_level + 1)
228
229     def render(self, element, ctx):
230         counter = ctx.toc.add(element.meta.title(), ctx.toc_level)
231         root = super(Section, self).render(element, ctx)
232         root[0].set("id", "sect%d" % counter)
233         return root
234 HtmlFormat.renderers.register(core.Section, None, Section('section'))
235
236
237 HtmlFormat.renderers.register(core.Span, None, NaturalText('span'))
238 HtmlFormat.renderers.register(core.Span, 'cite', NaturalText('cite'))
239 HtmlFormat.renderers.register(core.Span, 'cite.code', LiteralText('code'))
240 HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em'))
241 HtmlFormat.renderers.register(core.Span, 'emp', NaturalText('strong'))
242
243
244 class SpanUri(LiteralText):
245     def render(self, element, ctx):
246         root = super(SpanUri, self).render(element, ctx)
247         root[0].attrib['href'] = element.text
248         return root
249 HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a'))
250
251
252 class SpanLink(LiteralText):
253     def render(self, element, ctx):
254         root = super(SpanLink, self).render(element, ctx)
255         src = element.attrib.get('href', '')
256         if src.startswith('file://'):
257             src = ctx.files_path + src[7:]
258         root[0].attrib['href'] = src
259         return root
260 HtmlFormat.renderers.register(core.Span, 'link', SpanLink('a'))