Epub: fixes #4164, #4166
[librarian.git] / src / librarian / builders / html.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 try:
5     from urllib.request import urlopen
6 except ImportError:
7     from urllib2 import urlopen
8 from lxml import etree
9 from librarian.html import add_anchors, add_table_of_contents, add_table_of_themes
10 from librarian import OutputFile
11
12
13 class HtmlBuilder:
14     file_extension = "html"
15     with_anchors = True
16     with_themes = True
17     with_toc = True
18     with_footnotes = True
19     with_nota_red = True
20     no_externalities = False
21
22     def __init__(self, base_url=None):
23         self._base_url = base_url
24
25         self.tree = text = etree.Element('div', **{'id': 'book-text'})
26         self.header = etree.SubElement(text, 'h1')
27
28         self.footnotes = etree.Element('div', id='footnotes')
29         self.footnote_counter = 0
30
31         self.nota_red = etree.Element('div', id='nota_red')
32
33         self.cursors = {
34             None: text,
35             'header': self.header,
36             'footnotes': self.footnotes,
37             'nota_red': self.nota_red,
38         }
39         self.current_cursors = [text]
40
41     @property
42     def base_url(self):
43         if self._base_url is not None:
44             return self._base_url
45         else:
46             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
47
48     @property
49     def cursor(self):
50         return self.current_cursors[-1]
51
52     def enter_fragment(self, fragment):
53         self.current_cursors.append(self.cursors[fragment])
54
55     def exit_fragment(self):
56         self.current_cursors.pop()
57
58     def create_fragment(self, name, element):
59         assert name not in self.cursors
60         self.cursors[name] = element
61
62     def forget_fragment(self, name):
63         del self.cursors[name]
64
65     def preprocess(self, document):
66         document._compat_assign_ordered_ids()
67         document._compat_assign_section_ids()
68
69     def build(self, document, **kwargs):
70         self.document = document
71
72         self.preprocess(document)
73         document.tree.getroot().html_build(self)
74         self.postprocess(document)
75         return self.output()
76
77     def output(self):
78         return OutputFile.from_bytes(
79             etree.tostring(
80                 self.tree,
81                 method='html',
82                 encoding='utf-8',
83                 pretty_print=True
84             )
85         )
86
87     def postprocess(self, document):
88         _ = document.tree.getroot().master.gettext
89
90         if document.meta.translators:
91             self.enter_fragment('header')
92             self.start_element('span', {'class': 'translator'})
93             self.push_text(_("translated by") + " ")
94             self.push_text(
95                 ", ".join(
96                     translator.readable()
97                     for translator in document.meta.translators
98                 )
99             )
100             self.exit_fragment()
101
102         if self.with_anchors:
103             add_anchors(self.tree)
104         if self.with_nota_red and len(self.nota_red):
105             self.tree.append(self.nota_red)
106         if self.with_themes:
107             add_table_of_themes(self.tree)
108         if self.with_toc:
109             add_table_of_contents(self.tree)
110
111         if self.footnote_counter:
112             fnheader = etree.Element("h3")
113             fnheader.text = _("Footnotes")
114             self.footnotes.insert(0, fnheader)
115             self.tree.append(self.footnotes)
116
117     def start_element(self, tag, attrib=None):
118         self.current_cursors.append(etree.SubElement(
119             self.cursor,
120             tag,
121             **(attrib or {})
122         ))
123
124     def end_element(self):
125         self.current_cursors.pop()
126
127     def push_text(self, text):
128         cursor = self.cursor
129         if len(cursor):
130             cursor[-1].tail = (cursor[-1].tail or '') + text
131         else:
132             cursor.text = (cursor.text or '') + text
133
134
135 class StandaloneHtmlBuilder(HtmlBuilder):
136     css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css"
137
138     def postprocess(self, document):
139         super(StandaloneHtmlBuilder, self).postprocess(document)
140
141         tree = etree.Element('html')
142         body = etree.SubElement(tree, 'body')
143         body.append(self.tree)
144         self.tree = tree
145
146         head = etree.Element('head')
147         tree.insert(0, head)
148
149
150         etree.SubElement(head, 'meta', charset='utf-8')
151         etree.SubElement(head, 'title').text = document.meta.title
152
153         etree.SubElement(
154             head,
155             'meta',
156             name="viewport",
157             content="width=device-width, initial-scale=1, maximum-scale=1"
158         )
159
160         if self.no_externalities:
161             etree.SubElement(
162                 head, 'style',
163             ).text = urlopen(self.css_url).read().decode('utf-8')
164         else:
165             etree.SubElement(
166                 head,
167                 'link',
168                 href=self.css_url,
169                 rel="stylesheet",
170                 type="text/css",
171             )
172
173             etree.SubElement(
174                 body, 'script',
175                 src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"
176             )
177
178             etree.SubElement(
179                 body,
180                 "script",
181                 src="http://malsup.github.io/min/jquery.cycle2.min.js"
182             )
183
184
185 class DaisyHtmlBuilder(StandaloneHtmlBuilder):
186     file_extension = 'xhtml'
187     with_anchors = False
188     with_themes = False
189     with_toc = False
190     with_footnotes = False
191     with_nota_red = False
192     with_deep_identifiers = False
193     no_externalities = True
194
195     def output(self):
196         tree = etree.ElementTree(self.tree)
197         tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN'
198         tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
199         return OutputFile.from_bytes(
200             etree.tostring(
201                 tree,
202                 encoding='utf-8',
203                 pretty_print=True,
204                 xml_declaration=True
205             )
206         )
207