X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/d04e61819290fc8d6d71b1932c55a774014c1f05..82c0860d1520489be56457829d49eb17f165b9cd:/src/librarian/builders/html.py?ds=sidebyside diff --git a/src/librarian/builders/html.py b/src/librarian/builders/html.py index 40d7777..66db675 100644 --- a/src/librarian/builders/html.py +++ b/src/librarian/builders/html.py @@ -1,23 +1,75 @@ -# coding: utf-8 -from __future__ import unicode_literals - +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. +# +from collections import defaultdict +import os +from urllib.request import urlopen from lxml import etree -from librarian.html import add_anchors, add_table_of_contents, add_table_of_themes +from librarian.html import add_table_of_contents, add_table_of_themes, add_image_sizes from librarian import OutputFile -class HtmlBuilder: - file_extension = "html" - identifier = "html" +class TreeBuilder: + @property + def cursor(self): + return self.current_cursors[-1] + + def enter_fragment(self, fragment): + cursor = self.cursors.get(fragment, self.cursor) + self.current_cursors.append(cursor) + + def exit_fragment(self): + self.current_cursors.pop() + + def create_fragment(self, name, element): + assert name not in self.cursors + self.cursors[name] = element + + def forget_fragment(self, name): + del self.cursors[name] + + def start_element(self, tag, attrib=None): + self.current_cursors.append(etree.SubElement( + self.cursor, + tag, + **(attrib or {}) + )) + + def end_element(self): + self.current_cursors.pop() + + def push_text(self, text): + cursor = self.cursor + if len(cursor): + cursor[-1].tail = (cursor[-1].tail or '') + text + else: + cursor.text = (cursor.text or '') + text - def __init__(self, image_location='https://wolnelektury.pl/media/book/pictures/marcos-historia-kolorow/'): - self.image_location = image_location - self.tree = text = etree.Element('div', **{'id': 'book-text'}) - self.header = etree.SubElement(text, 'h1') +class HtmlBuilder(TreeBuilder): + build_method_fn = 'html_build' + file_extension = "html" + with_themes = True + with_toc = True + with_footnotes = True + with_nota_red = True + with_ids = True + with_numbering = True + no_externalities = False + orphans = True + + root_tag = 'div' + root_attrib = {'id': 'book-text'} + + def __init__(self, gallery_path=None, gallery_url=None, base_url=None): + self._base_url = base_url + self.gallery_path = gallery_path + self.gallery_url = gallery_url + + self.tree = text = etree.Element(self.root_tag, **self.root_attrib) + self.header = etree.Element('h1') self.footnotes = etree.Element('div', id='footnotes') - self.footnote_counter = 0 self.nota_red = etree.Element('div', id='nota_red') @@ -27,38 +79,40 @@ class HtmlBuilder: 'footnotes': self.footnotes, 'nota_red': self.nota_red, } - self.current_cursors = [None] + self.current_cursors = [text] @property - def cursor(self): - return self.cursors[self.current_cursors[-1]] - - @cursor.setter - def cursor(self, value): - self.cursors[self.current_cursors[-1]] = value - - def enter_fragment(self, fragment): - self.current_cursors.append(fragment) - - def exit_fragment(self): - self.current_cursors.pop() - - def create_fragment(self, name, element): - assert name not in self.cursors - self.cursors[name] = element + def base_url(self): + if self._base_url is not None: + return self._base_url + else: + return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug) - def forget_fragment(self, name): - del self.cursors[name] + def build(self, document, element=None, **kwargs): + self.document = document + self.document.assign_ids() + self.prepare_images() - def preprocess(self, document): - document._compat_assign_ordered_ids() - document._compat_assign_section_ids() + if element is None: + element = document.tree.getroot() - def build(self, document): - self.preprocess(document) - document.tree.getroot().html_build(self) + element.html_build(self) self.postprocess(document) - + return self.output() + + def prepare_images(self): + # Temporarily use the legacy method, before transitioning to external generators. + if self.gallery_path is None: + return + try: + os.makedirs(self.gallery_path) + except OSError: + pass + add_image_sizes(self.document.tree, self.gallery_path, self.gallery_url, self.base_url) + + def output(self): + if not len(self.tree): + return None return OutputFile.from_bytes( etree.tostring( self.tree, @@ -69,7 +123,7 @@ class HtmlBuilder: ) def postprocess(self, document): - _ = document.tree.getroot().master.gettext + _ = document.tree.getroot().gettext if document.meta.translators: self.enter_fragment('header') @@ -83,40 +137,35 @@ class HtmlBuilder: ) self.exit_fragment() - add_anchors(self.tree) - if len(self.nota_red): + if len(self.header): + self.tree.insert(0, self.header) + + if self.with_nota_red and len(self.nota_red): self.tree.append(self.nota_red) - add_table_of_themes(self.tree) - add_table_of_contents(self.tree) + if self.with_themes: + add_table_of_themes(self.tree) + if self.with_toc: + add_table_of_contents(self.tree) - if self.footnote_counter: + if len(self.footnotes): fnheader = etree.Element("h3") fnheader.text = _("Footnotes") self.footnotes.insert(0, fnheader) self.tree.append(self.footnotes) - def start_element(self, tag, attrib=None): - self.cursor = etree.SubElement( - self.cursor, - tag, - **(attrib or {}) - ) - - def end_element(self): - self.cursor = self.cursor.getparent() - - def push_text(self, text): - if text == 'Między nami nic nie było': - print(self.cursors) - print(self.current_cursors) - cursor = self.cursor - if len(cursor): - cursor[-1].tail = (cursor[-1].tail or '') + text - else: - cursor.text = (cursor.text or '') + text + def add_visible_number(self, element): + assert '_id' in element.attrib, etree.tostring(element) + self.start_element('a', { + 'href': f'#{element.attrib["_id"]}', + 'class': 'wl-num', + }) + self.push_text(element.attrib['_visible_numbering']) + self.end_element() class StandaloneHtmlBuilder(HtmlBuilder): + css_url = "https://static.wolnelektury.pl/css/compressed/book_text.css" + def postprocess(self, document): super(StandaloneHtmlBuilder, self).postprocess(document) @@ -128,7 +177,6 @@ class StandaloneHtmlBuilder(HtmlBuilder): head = etree.Element('head') tree.insert(0, head) - etree.SubElement(head, 'meta', charset='utf-8') etree.SubElement(head, 'title').text = document.meta.title @@ -139,21 +187,80 @@ class StandaloneHtmlBuilder(HtmlBuilder): content="width=device-width, initial-scale=1, maximum-scale=1" ) - etree.SubElement( - head, - 'link', - href="https://static.wolnelektury.pl/css/compressed/book_text.css", - rel="stylesheet", - type="text/css", - ) + if self.no_externalities: + etree.SubElement( + head, 'style', + ).text = urlopen(self.css_url).read().decode('utf-8') + else: + etree.SubElement( + head, + 'link', + href=self.css_url, + rel="stylesheet", + type="text/css", + ) - etree.SubElement( - body, 'script', - src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js" - ) + etree.SubElement( + body, 'script', + src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js" + ) - etree.SubElement( - body, - "script", - src="http://malsup.github.io/min/jquery.cycle2.min.js" + etree.SubElement( + body, + "script", + src="http://malsup.github.io/min/jquery.cycle2.min.js" + ) + + +class SnippetHtmlBuilder(HtmlBuilder): + with_themes = False + with_toc = False + with_footnotes = False + with_nota_red = False + with_ids = False + with_numbering = False + + +class AbstraktHtmlBuilder(HtmlBuilder): + with_themes = False + with_toc = False + with_footnotes = False + with_nota_red = False + with_ids = False + with_numbering = False + + root_tag = 'blockquote' + root_attrib = {} + + def build(self, document, element=None, **kwargs): + if element is None: + element = document.tree.find('//abstrakt') + if element is None: + return OutputFile.from_bytes(b'') + element.attrib['_force'] = '1' + return super().build(document, element, **kwargs) + + +class DaisyHtmlBuilder(StandaloneHtmlBuilder): + file_extension = 'xhtml' + with_themes = False + with_toc = False + with_footnotes = False + with_nota_red = False + with_deep_identifiers = False + no_externalities = True + with_numbering = False + + def output(self): + tree = etree.ElementTree(self.tree) + tree.docinfo.public_id = '-//W3C//DTD XHTML 1.0 Transitional//EN' + tree.docinfo.system_url = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' + return OutputFile.from_bytes( + etree.tostring( + tree, + encoding='utf-8', + pretty_print=True, + xml_declaration=True + ) ) +