Basic biblical tools.
[librarian.git] / src / librarian / builders / html.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import defaultdict
5 import os
6 from urllib.request import urlopen
7 from lxml import etree
8 from librarian.html import add_table_of_contents, add_table_of_themes, add_image_sizes
9 from librarian import OutputFile
10
11
12 class TreeBuilder:
13     @property
14     def cursor(self):
15         return self.current_cursors[-1]
16
17     def enter_fragment(self, fragment):
18         cursor = self.cursors.get(fragment, self.cursor)
19         self.current_cursors.append(cursor)
20
21     def exit_fragment(self):
22         self.current_cursors.pop()
23
24     def create_fragment(self, name, element):
25         assert name not in self.cursors
26         self.cursors[name] = element
27
28     def forget_fragment(self, name):
29         del self.cursors[name]
30
31     def start_element(self, tag, attrib=None):
32         self.current_cursors.append(etree.SubElement(
33             self.cursor,
34             tag,
35             **(attrib or {})
36         ))
37
38     def end_element(self):
39         self.current_cursors.pop()
40
41     def push_text(self, text):
42         cursor = self.cursor
43         if len(cursor):
44             cursor[-1].tail = (cursor[-1].tail or '') + text
45         else:
46             cursor.text = (cursor.text or '') + text
47
48     def simple_element(self, tag, text='', attrib=None):
49         self.start_element(tag, attrib)
50         self.push_text(text)
51         self.end_element()
52
53
54 class HtmlBuilder(TreeBuilder):
55     build_method_fn = 'html_build'
56     file_extension = "html"
57     with_themes = True
58     with_toc = True
59     with_footnotes = True
60     with_nota_red = True
61     with_ids = True
62     with_numbering = True
63     no_externalities = False
64     orphans = True
65
66     root_tag = 'div'
67     root_attrib = {'id': 'book-text'}
68
69     def __init__(self, gallery_path=None, gallery_url=None, base_url=None):
70         self._base_url = base_url
71         self.gallery_path = gallery_path
72         self.gallery_url = gallery_url
73
74         self.tree = text = etree.Element(self.root_tag, **self.root_attrib)
75         self.header = etree.Element('h1')
76
77         self.footnotes = etree.Element('div', id='footnotes')
78
79         self.nota_red = etree.Element('div', id='nota_red')
80
81         self.cursors = {
82             None: text,
83             'header': self.header,
84             'footnotes': self.footnotes,
85             'nota_red': self.nota_red,
86         }
87         self.current_cursors = [text]
88
89     @property
90     def base_url(self):
91         if self._base_url is not None:
92             return self._base_url
93         else:
94             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
95
96     def build(self, document, element=None, **kwargs):
97         self.document = document
98         self.document.assign_ids()
99         self.prepare_images()
100
101         if element is None:
102             element = document.tree.getroot()
103
104         element.html_build(self)
105         self.postprocess(document)
106         return self.output()
107
108     def prepare_images(self):
109         # Temporarily use the legacy method, before transitioning to external generators.
110         if self.gallery_path is None:
111             return
112         try:
113             os.makedirs(self.gallery_path)
114         except OSError:
115             pass
116         add_image_sizes(self.document.tree, self.gallery_path, self.gallery_url, self.base_url)
117
118     def output(self):
119         if not len(self.tree):
120             return None
121         return OutputFile.from_bytes(
122             etree.tostring(
123                 self.tree,
124                 method='html',
125                 encoding='utf-8',
126                 pretty_print=True
127             )
128         )
129
130     def postprocess(self, document):
131         _ = document.tree.getroot().gettext
132
133         if document.meta.translators:
134             self.enter_fragment('header')
135             self.start_element('span', {'class': 'translator'})
136             self.push_text(_("translated by") + " ")
137             self.push_text(
138                 ", ".join(
139                     translator.readable()
140                     for translator in document.meta.translators
141                 )
142             )
143             self.exit_fragment()
144
145         if len(self.header):
146             self.tree.insert(0, self.header)
147             
148         if self.with_nota_red and len(self.nota_red):
149             self.tree.append(self.nota_red)
150         if self.with_themes:
151             add_table_of_themes(self.tree)
152         if self.with_toc:
153             add_table_of_contents(self.tree)
154
155         if len(self.footnotes):
156             fnheader = etree.Element("h3")
157             fnheader.text = _("Footnotes")
158             self.footnotes.insert(0, fnheader)
159             self.tree.append(self.footnotes)
160
161     def add_visible_number(self, element):
162         assert '_id' in element.attrib, etree.tostring(element)
163         self.start_element('a', {
164             'href': f'#{element.attrib["_id"]}',
165             'class': 'wl-num',
166         })
167         self.push_text(element.attrib['_visible_numbering'])
168         self.end_element()
169
170
171 class StandaloneHtmlBuilder(HtmlBuilder):
172     css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
173
174     def postprocess(self, document):
175         super(StandaloneHtmlBuilder, self).postprocess(document)
176
177         tree = etree.Element('html')
178         body = etree.SubElement(tree, 'body')
179         body.append(self.tree)
180         self.tree = tree
181
182         head = etree.Element('head')
183         tree.insert(0, head)
184
185         etree.SubElement(head, 'meta', charset='utf-8')
186         etree.SubElement(head, 'title').text = document.meta.title
187
188         etree.SubElement(
189             head,
190             'meta',
191             name="viewport",
192             content="width=device-width, initial-scale=1, maximum-scale=1"
193         )
194
195         if self.no_externalities:
196             etree.SubElement(
197                 head, 'style',
198             ).text = urlopen(self.css_url).read().decode('utf-8')
199         else:
200             etree.SubElement(
201                 head,
202                 'link',
203                 href=self.css_url,
204                 rel="stylesheet",
205                 type="text/css",
206             )
207
208             etree.SubElement(
209                 body, 'script',
210                 src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
211             )
212
213             etree.SubElement(
214                 body,
215                 "script",
216                 src="http://malsup.github.io/min/jquery.cycle2.min.js"
217             )
218
219
220 class SnippetHtmlBuilder(HtmlBuilder):
221     with_themes = False
222     with_toc = False
223     with_footnotes = False
224     with_nota_red = False
225     with_ids = False
226     with_numbering = False
227
228
229 class AbstraktHtmlBuilder(HtmlBuilder):
230     with_themes = False
231     with_toc = False
232     with_footnotes = False
233     with_nota_red = False
234     with_ids = False
235     with_numbering = False
236
237     root_tag = 'blockquote'
238     root_attrib = {}
239
240     def build(self, document, element=None, **kwargs):
241         if element is None:
242             element = document.tree.find('//abstrakt')
243         if element is None:
244             return OutputFile.from_bytes(b'')
245         element.attrib['_force'] = '1'
246         return super().build(document, element, **kwargs)
247
248             
249 class DaisyHtmlBuilder(StandaloneHtmlBuilder):
250     file_extension = 'xhtml'
251     with_themes = False
252     with_toc = False
253     with_footnotes = False
254     with_nota_red = False
255     with_deep_identifiers = False
256     no_externalities = True
257     with_numbering = False
258
259     def output(self):
260         tree = etree.ElementTree(self.tree)
261         tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
262         tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
263         return OutputFile.from_bytes(
264             etree.tostring(
265                 tree,
266                 encoding='utf-8',
267                 pretty_print=True,
268                 xml_declaration=True
269             )
270         )
271