f2177ae33d8142c1a2e70a5b70584f4a90e0344d
[librarian.git] / src / librarian / builders / html.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import defaultdict
5 import os
6 from urllib.request import urlopen
7 from lxml import etree
8 from librarian.html import add_table_of_contents, add_table_of_themes, add_image_sizes
9 from librarian import OutputFile
10
11
12 class HtmlBuilder:
13     file_extension = "html"
14     with_themes = True
15     with_toc = True
16     with_footnotes = True
17     with_nota_red = True
18     with_ids = True
19     with_numbering = True
20     no_externalities = False
21     orphans = True
22
23     root_tag = 'div'
24     root_attrib = {'id': 'book-text'}
25
26     def __init__(self, gallery_path=None, gallery_url=None, base_url=None):
27         self._base_url = base_url
28         self.gallery_path = gallery_path
29         self.gallery_url = gallery_url
30
31         self.tree = text = etree.Element(self.root_tag, **self.root_attrib)
32         self.header = etree.Element('h1')
33
34         self.footnotes = etree.Element('div', id='footnotes')
35         self.counters = defaultdict(lambda: 1)
36
37         self.nota_red = etree.Element('div', id='nota_red')
38
39         self.cursors = {
40             None: text,
41             'header': self.header,
42             'footnotes': self.footnotes,
43             'nota_red': self.nota_red,
44         }
45         self.current_cursors = [text]
46
47     @property
48     def base_url(self):
49         if self._base_url is not None:
50             return self._base_url
51         else:
52             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
53
54     @property
55     def cursor(self):
56         return self.current_cursors[-1]
57
58     def enter_fragment(self, fragment):
59         cursor = self.cursors.get(fragment, self.cursor)
60         self.current_cursors.append(cursor)
61
62     def exit_fragment(self):
63         self.current_cursors.pop()
64
65     def create_fragment(self, name, element):
66         assert name not in self.cursors
67         self.cursors[name] = element
68
69     def forget_fragment(self, name):
70         del self.cursors[name]
71
72     def build(self, document, element=None, **kwargs):
73         self.document = document
74
75         self.assign_ids(self.document.tree)
76         self.prepare_images()
77
78         if element is None:
79             element = document.tree.getroot()
80
81         element.html_build(self)
82         self.postprocess(document)
83         return self.output()
84
85     def assign_ids(self, tree):
86         # Assign IDs depth-first, to account for any <numeracja> inside.
87         for _e, elem in etree.iterwalk(tree, events=('end',)):
88             if getattr(elem, 'NUMBERING', None):
89                 elem.assign_id(self)
90
91     def prepare_images(self):
92         # Temporarily use the legacy method, before transitioning to external generators.
93         if self.gallery_path is None:
94             return
95         try:
96             os.makedirs(self.gallery_path)
97         except OSError:
98             pass
99         add_image_sizes(self.document.tree, self.gallery_path, self.gallery_url, self.base_url)
100
101     def output(self):
102         if not len(self.tree):
103             return None
104         return OutputFile.from_bytes(
105             etree.tostring(
106                 self.tree,
107                 method='html',
108                 encoding='utf-8',
109                 pretty_print=True
110             )
111         )
112
113     def postprocess(self, document):
114         _ = document.tree.getroot().gettext
115
116         if document.meta.translators:
117             self.enter_fragment('header')
118             self.start_element('span', {'class': 'translator'})
119             self.push_text(_("translated by") + " ")
120             self.push_text(
121                 ", ".join(
122                     translator.readable()
123                     for translator in document.meta.translators
124                 )
125             )
126             self.exit_fragment()
127
128         if len(self.header):
129             self.tree.insert(0, self.header)
130             
131         if self.with_nota_red and len(self.nota_red):
132             self.tree.append(self.nota_red)
133         if self.with_themes:
134             add_table_of_themes(self.tree)
135         if self.with_toc:
136             add_table_of_contents(self.tree)
137
138         if self.counters['fn'] > 1:
139             fnheader = etree.Element("h3")
140             fnheader.text = _("Footnotes")
141             self.footnotes.insert(0, fnheader)
142             self.tree.append(self.footnotes)
143
144     def start_element(self, tag, attrib=None):
145         self.current_cursors.append(etree.SubElement(
146             self.cursor,
147             tag,
148             **(attrib or {})
149         ))
150
151     def end_element(self):
152         self.current_cursors.pop()
153
154     def push_text(self, text):
155         cursor = self.cursor
156         if len(cursor):
157             cursor[-1].tail = (cursor[-1].tail or '') + text
158         else:
159             cursor.text = (cursor.text or '') + text
160
161     def add_visible_number(self, element):
162         assert '_id' in element.attrib, etree.tostring(element)
163         self.start_element('a', {
164             'href': f'#{element.attrib["_id"]}',
165             'class': 'wl-num',
166         })
167         self.push_text(element.attrib['_visible_numbering'])
168         self.end_element()
169
170
171 class StandaloneHtmlBuilder(HtmlBuilder):
172     css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
173
174     def postprocess(self, document):
175         super(StandaloneHtmlBuilder, self).postprocess(document)
176
177         tree = etree.Element('html')
178         body = etree.SubElement(tree, 'body')
179         body.append(self.tree)
180         self.tree = tree
181
182         head = etree.Element('head')
183         tree.insert(0, head)
184
185         etree.SubElement(head, 'meta', charset='utf-8')
186         etree.SubElement(head, 'title').text = document.meta.title
187
188         etree.SubElement(
189             head,
190             'meta',
191             name="viewport",
192             content="width=device-width, initial-scale=1, maximum-scale=1"
193         )
194
195         if self.no_externalities:
196             etree.SubElement(
197                 head, 'style',
198             ).text = urlopen(self.css_url).read().decode('utf-8')
199         else:
200             etree.SubElement(
201                 head,
202                 'link',
203                 href=self.css_url,
204                 rel="stylesheet",
205                 type="text/css",
206             )
207
208             etree.SubElement(
209                 body, 'script',
210                 src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
211             )
212
213             etree.SubElement(
214                 body,
215                 "script",
216                 src="http://malsup.github.io/min/jquery.cycle2.min.js"
217             )
218
219
220 class SnippetHtmlBuilder(HtmlBuilder):
221     with_themes = False
222     with_toc = False
223     with_footnotes = False
224     with_nota_red = False
225     with_ids = False
226     with_numbering = False
227
228
229 class AbstraktHtmlBuilder(HtmlBuilder):
230     with_themes = False
231     with_toc = False
232     with_footnotes = False
233     with_nota_red = False
234     with_ids = False
235     with_numbering = False
236
237     root_tag = 'blockquote'
238     root_attrib = {}
239
240     def build(self, document, element=None, **kwargs):
241         if element is None:
242             element = document.tree.find('//abstrakt')
243         element.attrib['_force'] = '1'
244         return super().build(document, element, **kwargs)
245
246             
247 class DaisyHtmlBuilder(StandaloneHtmlBuilder):
248     file_extension = 'xhtml'
249     with_themes = False
250     with_toc = False
251     with_footnotes = False
252     with_nota_red = False
253     with_deep_identifiers = False
254     no_externalities = True
255     with_numbering = False
256
257     def output(self):
258         tree = etree.ElementTree(self.tree)
259         tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
260         tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
261         return OutputFile.from_bytes(
262             etree.tostring(
263                 tree,
264                 encoding='utf-8',
265                 pretty_print=True,
266                 xml_declaration=True
267             )
268         )
269