Some prelim work on builder api.
[librarian.git] / src / librarian / builders / html.py
1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from collections import defaultdict
5 import os
6 from urllib.request import urlopen
7 from lxml import etree
8 from librarian.html import add_table_of_contents, add_table_of_themes, add_image_sizes
9 from librarian import OutputFile
10
11
12 class TreeBuilder:
13     @property
14     def cursor(self):
15         return self.current_cursors[-1]
16
17     def enter_fragment(self, fragment):
18         cursor = self.cursors.get(fragment, self.cursor)
19         self.current_cursors.append(cursor)
20
21     def exit_fragment(self):
22         self.current_cursors.pop()
23
24     def create_fragment(self, name, element):
25         assert name not in self.cursors
26         self.cursors[name] = element
27
28     def forget_fragment(self, name):
29         del self.cursors[name]
30
31     def start_element(self, tag, attrib=None):
32         self.current_cursors.append(etree.SubElement(
33             self.cursor,
34             tag,
35             **(attrib or {})
36         ))
37
38     def end_element(self):
39         self.current_cursors.pop()
40
41     def push_text(self, text):
42         cursor = self.cursor
43         if len(cursor):
44             cursor[-1].tail = (cursor[-1].tail or '') + text
45         else:
46             cursor.text = (cursor.text or '') + text
47
48
49 class HtmlBuilder(TreeBuilder):
50     build_method_fn = 'html_build'
51     file_extension = "html"
52     with_themes = True
53     with_toc = True
54     with_footnotes = True
55     with_nota_red = True
56     with_ids = True
57     with_numbering = True
58     no_externalities = False
59     orphans = True
60
61     root_tag = 'div'
62     root_attrib = {'id': 'book-text'}
63
64     def __init__(self, gallery_path=None, gallery_url=None, base_url=None):
65         self._base_url = base_url
66         self.gallery_path = gallery_path
67         self.gallery_url = gallery_url
68
69         self.tree = text = etree.Element(self.root_tag, **self.root_attrib)
70         self.header = etree.Element('h1')
71
72         self.footnotes = etree.Element('div', id='footnotes')
73
74         self.nota_red = etree.Element('div', id='nota_red')
75
76         self.cursors = {
77             None: text,
78             'header': self.header,
79             'footnotes': self.footnotes,
80             'nota_red': self.nota_red,
81         }
82         self.current_cursors = [text]
83
84     @property
85     def base_url(self):
86         if self._base_url is not None:
87             return self._base_url
88         else:
89             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
90
91     def build(self, document, element=None, **kwargs):
92         self.document = document
93         self.document.assign_ids()
94         self.prepare_images()
95
96         if element is None:
97             element = document.tree.getroot()
98
99         element.html_build(self)
100         self.postprocess(document)
101         return self.output()
102
103     def prepare_images(self):
104         # Temporarily use the legacy method, before transitioning to external generators.
105         if self.gallery_path is None:
106             return
107         try:
108             os.makedirs(self.gallery_path)
109         except OSError:
110             pass
111         add_image_sizes(self.document.tree, self.gallery_path, self.gallery_url, self.base_url)
112
113     def output(self):
114         if not len(self.tree):
115             return None
116         return OutputFile.from_bytes(
117             etree.tostring(
118                 self.tree,
119                 method='html',
120                 encoding='utf-8',
121                 pretty_print=True
122             )
123         )
124
125     def postprocess(self, document):
126         _ = document.tree.getroot().gettext
127
128         if document.meta.translators:
129             self.enter_fragment('header')
130             self.start_element('span', {'class': 'translator'})
131             self.push_text(_("translated by") + " ")
132             self.push_text(
133                 ", ".join(
134                     translator.readable()
135                     for translator in document.meta.translators
136                 )
137             )
138             self.exit_fragment()
139
140         if len(self.header):
141             self.tree.insert(0, self.header)
142             
143         if self.with_nota_red and len(self.nota_red):
144             self.tree.append(self.nota_red)
145         if self.with_themes:
146             add_table_of_themes(self.tree)
147         if self.with_toc:
148             add_table_of_contents(self.tree)
149
150         if len(self.footnotes):
151             fnheader = etree.Element("h3")
152             fnheader.text = _("Footnotes")
153             self.footnotes.insert(0, fnheader)
154             self.tree.append(self.footnotes)
155
156     def add_visible_number(self, element):
157         assert '_id' in element.attrib, etree.tostring(element)
158         self.start_element('a', {
159             'href': f'#{element.attrib["_id"]}',
160             'class': 'wl-num',
161         })
162         self.push_text(element.attrib['_visible_numbering'])
163         self.end_element()
164
165
166 class StandaloneHtmlBuilder(HtmlBuilder):
167     css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
168
169     def postprocess(self, document):
170         super(StandaloneHtmlBuilder, self).postprocess(document)
171
172         tree = etree.Element('html')
173         body = etree.SubElement(tree, 'body')
174         body.append(self.tree)
175         self.tree = tree
176
177         head = etree.Element('head')
178         tree.insert(0, head)
179
180         etree.SubElement(head, 'meta', charset='utf-8')
181         etree.SubElement(head, 'title').text = document.meta.title
182
183         etree.SubElement(
184             head,
185             'meta',
186             name="viewport",
187             content="width=device-width, initial-scale=1, maximum-scale=1"
188         )
189
190         if self.no_externalities:
191             etree.SubElement(
192                 head, 'style',
193             ).text = urlopen(self.css_url).read().decode('utf-8')
194         else:
195             etree.SubElement(
196                 head,
197                 'link',
198                 href=self.css_url,
199                 rel="stylesheet",
200                 type="text/css",
201             )
202
203             etree.SubElement(
204                 body, 'script',
205                 src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
206             )
207
208             etree.SubElement(
209                 body,
210                 "script",
211                 src="http://malsup.github.io/min/jquery.cycle2.min.js"
212             )
213
214
215 class SnippetHtmlBuilder(HtmlBuilder):
216     with_themes = False
217     with_toc = False
218     with_footnotes = False
219     with_nota_red = False
220     with_ids = False
221     with_numbering = False
222
223
224 class AbstraktHtmlBuilder(HtmlBuilder):
225     with_themes = False
226     with_toc = False
227     with_footnotes = False
228     with_nota_red = False
229     with_ids = False
230     with_numbering = False
231
232     root_tag = 'blockquote'
233     root_attrib = {}
234
235     def build(self, document, element=None, **kwargs):
236         if element is None:
237             element = document.tree.find('//abstrakt')
238         if element is None:
239             return OutputFile.from_bytes(b'')
240         element.attrib['_force'] = '1'
241         return super().build(document, element, **kwargs)
242
243             
244 class DaisyHtmlBuilder(StandaloneHtmlBuilder):
245     file_extension = 'xhtml'
246     with_themes = False
247     with_toc = False
248     with_footnotes = False
249     with_nota_red = False
250     with_deep_identifiers = False
251     no_externalities = True
252     with_numbering = False
253
254     def output(self):
255         tree = etree.ElementTree(self.tree)
256         tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
257         tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
258         return OutputFile.from_bytes(
259             etree.tostring(
260                 tree,
261                 encoding='utf-8',
262                 pretty_print=True,
263                 xml_declaration=True
264             )
265         )
266