5bbe76a6142fae84e4ca6b87bf792f17153d5c5a
[librarian.git] / src / librarian / builders / html.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import defaultdict
5 import os
6 from urllib.request import urlopen
7 from lxml import etree
8 from librarian.html import add_table_of_contents, add_table_of_themes, add_image_sizes
9 from librarian import OutputFile
10
11
12 class HtmlBuilder:
13     file_extension = "html"
14     with_themes = True
15     with_toc = True
16     with_footnotes = True
17     with_nota_red = True
18     with_ids = True
19     with_numbering = True
20     no_externalities = False
21     orphans = True
22
23     root_tag = 'div'
24     root_attrib = {'id': 'book-text'}
25
26     def __init__(self, gallery_path=None, gallery_url=None, base_url=None):
27         self._base_url = base_url
28         self.gallery_path = gallery_path
29         self.gallery_url = gallery_url
30
31         self.tree = text = etree.Element(self.root_tag, **self.root_attrib)
32         self.header = etree.Element('h1')
33
34         self.footnotes = etree.Element('div', id='footnotes')
35
36         self.nota_red = etree.Element('div', id='nota_red')
37
38         self.cursors = {
39             None: text,
40             'header': self.header,
41             'footnotes': self.footnotes,
42             'nota_red': self.nota_red,
43         }
44         self.current_cursors = [text]
45
46     @property
47     def base_url(self):
48         if self._base_url is not None:
49             return self._base_url
50         else:
51             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
52
53     @property
54     def cursor(self):
55         return self.current_cursors[-1]
56
57     def enter_fragment(self, fragment):
58         cursor = self.cursors.get(fragment, self.cursor)
59         self.current_cursors.append(cursor)
60
61     def exit_fragment(self):
62         self.current_cursors.pop()
63
64     def create_fragment(self, name, element):
65         assert name not in self.cursors
66         self.cursors[name] = element
67
68     def forget_fragment(self, name):
69         del self.cursors[name]
70
71     def build(self, document, element=None, **kwargs):
72         self.document = document
73         self.document.assign_ids()
74         self.prepare_images()
75
76         if element is None:
77             element = document.tree.getroot()
78
79         element.html_build(self)
80         self.postprocess(document)
81         return self.output()
82
83     def prepare_images(self):
84         # Temporarily use the legacy method, before transitioning to external generators.
85         if self.gallery_path is None:
86             return
87         try:
88             os.makedirs(self.gallery_path)
89         except OSError:
90             pass
91         add_image_sizes(self.document.tree, self.gallery_path, self.gallery_url, self.base_url)
92
93     def output(self):
94         if not len(self.tree):
95             return None
96         return OutputFile.from_bytes(
97             etree.tostring(
98                 self.tree,
99                 method='html',
100                 encoding='utf-8',
101                 pretty_print=True
102             )
103         )
104
105     def postprocess(self, document):
106         _ = document.tree.getroot().gettext
107
108         if document.meta.translators:
109             self.enter_fragment('header')
110             self.start_element('span', {'class': 'translator'})
111             self.push_text(_("translated by") + " ")
112             self.push_text(
113                 ", ".join(
114                     translator.readable()
115                     for translator in document.meta.translators
116                 )
117             )
118             self.exit_fragment()
119
120         if len(self.header):
121             self.tree.insert(0, self.header)
122             
123         if self.with_nota_red and len(self.nota_red):
124             self.tree.append(self.nota_red)
125         if self.with_themes:
126             add_table_of_themes(self.tree)
127         if self.with_toc:
128             add_table_of_contents(self.tree)
129
130         if self.document.counters['fn'] > 1:
131             fnheader = etree.Element("h3")
132             fnheader.text = _("Footnotes")
133             self.footnotes.insert(0, fnheader)
134             self.tree.append(self.footnotes)
135
136     def start_element(self, tag, attrib=None):
137         self.current_cursors.append(etree.SubElement(
138             self.cursor,
139             tag,
140             **(attrib or {})
141         ))
142
143     def end_element(self):
144         self.current_cursors.pop()
145
146     def push_text(self, text):
147         cursor = self.cursor
148         if len(cursor):
149             cursor[-1].tail = (cursor[-1].tail or '') + text
150         else:
151             cursor.text = (cursor.text or '') + text
152
153     def add_visible_number(self, element):
154         assert '_id' in element.attrib, etree.tostring(element)
155         self.start_element('a', {
156             'href': f'#{element.attrib["_id"]}',
157             'class': 'wl-num',
158         })
159         self.push_text(element.attrib['_visible_numbering'])
160         self.end_element()
161
162
163 class StandaloneHtmlBuilder(HtmlBuilder):
164     css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
165
166     def postprocess(self, document):
167         super(StandaloneHtmlBuilder, self).postprocess(document)
168
169         tree = etree.Element('html')
170         body = etree.SubElement(tree, 'body')
171         body.append(self.tree)
172         self.tree = tree
173
174         head = etree.Element('head')
175         tree.insert(0, head)
176
177         etree.SubElement(head, 'meta', charset='utf-8')
178         etree.SubElement(head, 'title').text = document.meta.title
179
180         etree.SubElement(
181             head,
182             'meta',
183             name="viewport",
184             content="width=device-width, initial-scale=1, maximum-scale=1"
185         )
186
187         if self.no_externalities:
188             etree.SubElement(
189                 head, 'style',
190             ).text = urlopen(self.css_url).read().decode('utf-8')
191         else:
192             etree.SubElement(
193                 head,
194                 'link',
195                 href=self.css_url,
196                 rel="stylesheet",
197                 type="text/css",
198             )
199
200             etree.SubElement(
201                 body, 'script',
202                 src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
203             )
204
205             etree.SubElement(
206                 body,
207                 "script",
208                 src="http://malsup.github.io/min/jquery.cycle2.min.js"
209             )
210
211
212 class SnippetHtmlBuilder(HtmlBuilder):
213     with_themes = False
214     with_toc = False
215     with_footnotes = False
216     with_nota_red = False
217     with_ids = False
218     with_numbering = False
219
220
221 class AbstraktHtmlBuilder(HtmlBuilder):
222     with_themes = False
223     with_toc = False
224     with_footnotes = False
225     with_nota_red = False
226     with_ids = False
227     with_numbering = False
228
229     root_tag = 'blockquote'
230     root_attrib = {}
231
232     def build(self, document, element=None, **kwargs):
233         if element is None:
234             element = document.tree.find('//abstrakt')
235         if element is None:
236             return OutputFile.from_bytes(b'')
237         element.attrib['_force'] = '1'
238         return super().build(document, element, **kwargs)
239
240             
241 class DaisyHtmlBuilder(StandaloneHtmlBuilder):
242     file_extension = 'xhtml'
243     with_themes = False
244     with_toc = False
245     with_footnotes = False
246     with_nota_red = False
247     with_deep_identifiers = False
248     no_externalities = True
249     with_numbering = False
250
251     def output(self):
252         tree = etree.ElementTree(self.tree)
253         tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
254         tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
255         return OutputFile.from_bytes(
256             etree.tostring(
257                 tree,
258                 encoding='utf-8',
259                 pretty_print=True,
260                 xml_declaration=True
261             )
262         )
263