images in epub
[librarian.git] / librarian / formats / html / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import re
7 from lxml import etree
8 from librarian.formats import Format
9 from librarian.output import OutputFile
10 from librarian.renderers import Register, TreeRenderer
11 from librarian.utils import Context, get_resource
12 from librarian import core
13
14
15 class HtmlFormat(Format):
16     format_name = 'HTML'
17     format_ext = 'html'
18
19     renderers = Register()
20
21     def __init__(self, doc, standalone=False):
22         super(HtmlFormat, self).__init__(doc)
23         self.standalone = standalone
24
25     def build(self, files_path=None):
26         if self.standalone:
27             tmpl = get_resource("formats/html/res/html_standalone.html")
28         else:
29             tmpl = get_resource("formats/html/res/html.html")
30         t = etree.parse(tmpl)
31
32         ctx = Context(format=self)
33         ctx.files_path = files_path
34         ctx.toc = TOC()
35         ctx.toc_level = 0
36         ctx.footnotes = Footnotes()
37
38         if self.standalone:
39             t.find('head/title').text = u"%s (%s)" % (self.doc.meta.title(), self.doc.meta.author())
40
41         t.find('.//div[@id="content"]').extend(
42             self.render(self.doc.edoc.getroot(), ctx))
43         #t.find('.//div[@id="toc"]').append(ctx.toc.render())
44         t.find('.//div[@id="footnotes"]').extend(ctx.footnotes.output)
45
46         return OutputFile.from_string(etree.tostring(
47             t, encoding='utf-8', method="html"))
48
49     def render(self, element, ctx):
50         return self.renderers.get_for(element).render(element, ctx)
51
52
53 # Helpers
54
55 class NaturalText(TreeRenderer):
56     def render_text(self, text, ctx):
57         root, inner = self.text_container()
58         chunks = re.split('(?<=\s\w) ', text)
59         inner.text = chunks[0]
60         for chunk in chunks[1:]:
61             x = etree.Entity("nbsp")
62             x.tail = chunk
63             inner.append(x)
64         return root
65
66
67 class LiteralText(TreeRenderer):
68     pass
69
70
71 class Silent(TreeRenderer):
72     def render_text(self, text, ctx):
73         root, inner = self.text_container()
74         return root
75
76
77 class Footnotes(object):
78     def __init__(self):
79         self.counter = 0
80         self.output = etree.Element("_")
81
82     def append(self, item):
83         self.counter += 1
84         e = etree.Element("a",
85             href="#footnote-anchor-%d" % self.counter,
86             id="footnote-%d" % self.counter,
87             style="float:left;margin-right:1em")
88         e.text = "[%d]" % self.counter
89         e.tail = " "
90         self.output.append(e)
91         self.output.extend(item)
92         anchor = etree.Element("a",
93             id="footnote-anchor-%d" % self.counter,
94             href="#footnote-%d" % self.counter)
95         anchor.text = "[%d]" % self.counter
96         return anchor
97
98
99 class TOC(object):
100     def __init__(self):
101         self.items = []
102         self.counter = 0
103
104     def add(self, title, level=0):
105         self.counter += 1
106         self.items.append((level, title, self.counter))
107         return self.counter
108
109     def render(self):
110         out = etree.Element("ul", id="toc")
111         curr_level = 0
112         cursor = out
113         for level, title, counter in self.items:
114             while level > curr_level:
115                 ins = etree.Element("ul")
116                 cursor.append(ins)
117                 cursor = ins
118                 curr_level += 1
119             while level < curr_level:
120                 cursor = cursor.getparent()
121                 curr_level -= 1
122             ins = etree.Element("li")
123             ins.append(etree.Element("a", href="#sect%d" % counter))
124             ins[0].text = title
125             cursor.append(ins)
126         return out
127
128
129 # Renderers
130
131 HtmlFormat.renderers.register(core.Aside, None, NaturalText('aside'))
132 HtmlFormat.renderers.register(core.Aside, 'comment', Silent())
133
134 class AsideFootnote(NaturalText):
135     def render(self, element, ctx):
136         output = super(AsideFootnote, self).render(element, ctx)
137         anchor = ctx.footnotes.append(output)
138         root, inner = self.container()
139         inner.append(anchor)
140         return root
141 HtmlFormat.renderers.register(core.Aside, 'footnote', AsideFootnote())
142
143
144 class Header(NaturalText):
145     def render(self, element, ctx):
146         root = super(Header, self).render(element, ctx)
147         if ctx.toc_level == 1:
148             d = etree.SubElement(root, 'div', {'class': "page-header"})
149             d.insert(0, root[0])
150         else:
151             root[0].tag = 'h2'
152             if root[0].text:
153                 d = etree.SubElement(root[0], 'a', {'id': root[0].text, 'style': 'pointer: hand; color:#ddd; font-size:.8em'})
154                 #d.text = "per"
155         return root
156
157        
158 HtmlFormat.renderers.register(core.Header, None, Header('h1'))
159
160
161 HtmlFormat.renderers.register(core.Div, None, NaturalText('div'))
162
163 class DivDefined(NaturalText):
164     def render(self, element, ctx):
165         output = super(DivDefined, self).render(element, ctx)
166         output[0].text = (output[0].text or '') + ':'
167         output[0].attrib['id'] = output[0].text # not so cool?
168         return output
169
170 HtmlFormat.renderers.register(core.Div, 'defined', DivDefined('dt', {'style': 'display: inline-block'}))
171
172
173 class DivImage(NaturalText):
174     def render(self, element, ctx):
175         output = super(DivImage, self).render(element, ctx)
176         src = element.attrib.get('src', '')
177         if src.startswith('file://'):
178             src = ctx.files_path + src[7:]
179         output[0].attrib['src'] = src
180         output[0].attrib['style'] = 'display: block; width: 60%; margin: 3em auto'
181         return output
182
183 HtmlFormat.renderers.register(core.Div, 'img', DivImage('img'))
184
185 HtmlFormat.renderers.register(core.Div, 'item', NaturalText('li'))
186 HtmlFormat.renderers.register(core.Div, 'list', NaturalText('ul'))
187 HtmlFormat.renderers.register(core.Div, 'list.enum', NaturalText('ol'))
188
189 class DivListDefinitions(NaturalText):
190     def render(self, element, ctx):
191         output = super(DivListDefinitions, self).render(element, ctx)
192         #if ctx.toc_level > 2:
193         #    output[0].attrib['style'] = 'float: right'
194         return output
195
196 HtmlFormat.renderers.register(core.Div, 'list.definitions', DivListDefinitions('ul'))
197 HtmlFormat.renderers.register(core.Div, 'p', NaturalText('p'))
198
199
200 class Section(NaturalText):
201     def subcontext(self, element, ctx):
202         return Context(ctx, toc_level=ctx.toc_level + 1)
203
204     def render(self, element, ctx):
205         counter = ctx.toc.add(element.meta.title(), ctx.toc_level)
206         root = super(Section, self).render(element, ctx)
207         root[0].set("id", "sect%d" % counter)
208         return root
209 HtmlFormat.renderers.register(core.Section, None, Section('section'))
210
211
212 HtmlFormat.renderers.register(core.Span, None, NaturalText('span'))
213 HtmlFormat.renderers.register(core.Span, 'cite', NaturalText('cite'))
214 HtmlFormat.renderers.register(core.Span, 'cite.code', LiteralText('code'))
215 HtmlFormat.renderers.register(core.Span, 'emph', NaturalText('em'))
216 HtmlFormat.renderers.register(core.Span, 'emp', NaturalText('strong'))
217
218 class SpanUri(LiteralText):
219     def render(self, element, ctx):
220         root = super(SpanUri, self).render(element, ctx)
221         root[0].attrib['href'] = element.text
222         return root
223 HtmlFormat.renderers.register(core.Span, 'uri', SpanUri('a'))
224
225 class SpanLink(LiteralText):
226     def render(self, element, ctx):
227         root = super(SpanLink, self).render(element, ctx)
228         src = element.attrib.get('href', '')
229         if src.startswith('file://'):
230             src = ctx.files_path + src[7:]
231         root[0].attrib['href'] = src
232         return root
233 HtmlFormat.renderers.register(core.Span, 'link', SpanLink('a'))
234