From d04e61819290fc8d6d71b1932c55a774014c1f05 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 7 Oct 2020 12:13:52 +0200 Subject: [PATCH] Experimental class-based conversion now working for TXT and HTML. --- MANIFEST.in | 1 + Makefile | 5 + setup.py | 3 +- src/librarian/builders/__init__.py | 18 +-- src/librarian/builders/html.py | 143 ++++++++++++++---- src/librarian/builders/txt.py | 10 +- src/librarian/command_line.py | 3 +- src/librarian/document.py | 48 +++++- src/librarian/elements/__init__.py | 57 ++++--- src/librarian/elements/base.py | 34 ++--- src/librarian/elements/blocks/__init__.py | 3 + src/librarian/elements/blocks/dedykacja.py | 8 + src/librarian/elements/blocks/dlugi_cytat.py | 2 + src/librarian/elements/blocks/nota.py | 3 + src/librarian/elements/blocks/poezja_cyt.py | 2 + src/librarian/elements/blocks/ramka.py | 7 + src/librarian/elements/comments/__init__.py | 1 + src/librarian/elements/comments/nota_red.py | 11 ++ src/librarian/elements/comments/uwaga.py | 2 + src/librarian/elements/drama/__init__.py | 1 + src/librarian/elements/drama/didask_tekst.py | 3 + src/librarian/elements/drama/didaskalia.py | 2 + src/librarian/elements/drama/kwestia.py | 2 + src/librarian/elements/drama/lista_osob.py | 10 ++ src/librarian/elements/drama/lista_osoba.py | 7 + src/librarian/elements/drama/miejsce_czas.py | 5 + .../elements/drama/naglowek_listy.py | 2 +- .../elements/drama/naglowek_osoba.py | 1 + src/librarian/elements/drama/osoba.py | 3 +- src/librarian/elements/figures/__init__.py | 4 + src/librarian/elements/figures/animacja.py | 12 ++ src/librarian/elements/figures/ilustr.py | 4 +- src/librarian/elements/figures/kol.py | 5 + src/librarian/elements/figures/tabela.py | 12 ++ src/librarian/elements/figures/wiersz.py | 5 + src/librarian/elements/footnotes/__init__.py | 60 ++++++++ .../elements/front/dzielo_nadrzedne.py | 2 + src/librarian/elements/front/motto.py | 3 + src/librarian/elements/front/motto_podpis.py | 4 +- src/librarian/elements/front/nazwa_utworu.py | 1 - src/librarian/elements/headers/__init__.py | 3 + .../elements/headers/naglowek_czesc.py | 2 + .../elements/headers/naglowek_podrozdzial.py | 2 + .../elements/headers/podtytul_czesc.py | 9 ++ .../elements/headers/podtytul_podrozdzial.py | 9 ++ .../elements/headers/podtytul_rozdzial.py | 9 ++ src/librarian/elements/paragraphs/akap.py | 2 - src/librarian/elements/poetry/__init__.py | 2 + src/librarian/elements/poetry/strofa.py | 4 + src/librarian/elements/poetry/wers.py | 8 +- src/librarian/elements/poetry/wers_akap.py | 9 ++ src/librarian/elements/poetry/wers_cd.py | 4 + .../elements/poetry/wers_do_prawej.py | 9 ++ src/librarian/elements/poetry/wers_wciety.py | 10 +- src/librarian/elements/root/__init__.py | 6 + .../elements/separators/sekcja_asterysk.py | 7 + .../elements/separators/sekcja_swiatlo.py | 2 + .../elements/separators/separator_linia.py | 5 + src/librarian/elements/styles/__init__.py | 4 + src/librarian/elements/styles/indeks_dolny.py | 7 + src/librarian/elements/styles/mat.py | 10 ++ src/librarian/elements/styles/slowo_obce.py | 3 +- src/librarian/elements/styles/tytul_dziela.py | 3 + .../elements/styles/wieksze_odstepy.py | 9 ++ src/librarian/elements/styles/www.py | 5 + src/librarian/elements/styles/wyroznienie.py | 2 + src/librarian/elements/themes/end.py | 9 +- src/librarian/elements/themes/motyw.py | 16 +- src/librarian/html.py | 7 +- .../locale/pl/LC_MESSAGES/messages.mo | Bin 0 -> 659 bytes .../locale/pl/LC_MESSAGES/messages.po | 42 +++++ src/librarian/util.py | 11 ++ .../text/asnyk_miedzy_nami_expected.html | 24 +-- .../asnyk_miedzy_nami_expected.legacy.html | 41 +++++ .../text/asnyk_miedzy_nami_fragments.html | 2 + tests/test_html.py | 23 ++- 76 files changed, 694 insertions(+), 135 deletions(-) create mode 100644 Makefile create mode 100644 src/librarian/elements/blocks/dedykacja.py create mode 100644 src/librarian/elements/blocks/ramka.py create mode 100644 src/librarian/elements/comments/nota_red.py create mode 100644 src/librarian/elements/drama/miejsce_czas.py create mode 100644 src/librarian/elements/figures/animacja.py create mode 100644 src/librarian/elements/figures/kol.py create mode 100644 src/librarian/elements/figures/tabela.py create mode 100644 src/librarian/elements/figures/wiersz.py create mode 100644 src/librarian/elements/headers/podtytul_czesc.py create mode 100644 src/librarian/elements/headers/podtytul_podrozdzial.py create mode 100644 src/librarian/elements/headers/podtytul_rozdzial.py create mode 100644 src/librarian/elements/poetry/wers_akap.py create mode 100644 src/librarian/elements/poetry/wers_do_prawej.py create mode 100644 src/librarian/elements/styles/indeks_dolny.py create mode 100644 src/librarian/elements/styles/mat.py create mode 100644 src/librarian/elements/styles/wieksze_odstepy.py create mode 100644 src/librarian/elements/styles/www.py create mode 100644 src/librarian/locale/pl/LC_MESSAGES/messages.mo create mode 100644 src/librarian/locale/pl/LC_MESSAGES/messages.po create mode 100644 tests/files/text/asnyk_miedzy_nami_expected.legacy.html diff --git a/MANIFEST.in b/MANIFEST.in index a262372..ec780e3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,6 +10,7 @@ include src/librarian/epub/* include src/librarian/pdf/* include src/librarian/fb2/* include src/librarian/fonts/* +recursive-include src/librarian/locale *.po *.mo graft src/librarian/res graft src/librarian/font-optimizer diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e206641 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +locale: + find src/librarian -name '*.py' |xargs xgettext --from-code utf-8 -o - | sed '/^"POT-Creation-Date:/d' > messages.pot + for lang in pl; do msgmerge -U src/librarian/locale/$${lang}/LC_MESSAGES/messages.po messages.pot; done + rm messages.pot + diff --git a/setup.py b/setup.py index 1ddf324..d497bf1 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,8 @@ setup( package_dir={"": "src"}, package_data={'librarian': ['xslt/*.xslt', 'xslt/*.xml', 'epub/*', 'pdf/*', 'fb2/*', 'fonts/*'] + whole_tree(os.path.join(os.path.dirname(__file__), 'src/librarian'), 'res') + - whole_tree(os.path.join(os.path.dirname(__file__), 'src/librarian'), 'font-optimizer')}, + whole_tree(os.path.join(os.path.dirname(__file__), 'src/librarian'), 'font-optimizer') + + whole_tree(os.path.join(os.path.dirname(__file__), 'src/librarian'), 'locale')}, include_package_data=True, install_requires=[ 'lxml>=2.2,<=4.3', diff --git a/src/librarian/builders/__init__.py b/src/librarian/builders/__init__.py index fcd9194..e9afc56 100644 --- a/src/librarian/builders/__init__.py +++ b/src/librarian/builders/__init__.py @@ -1,14 +1,12 @@ +from collections import OrderedDict from .txt import TxtBuilder -from .html import HtmlBuilder +from .html import HtmlBuilder, StandaloneHtmlBuilder from .sanitize import Sanitizer -builders = [ - TxtBuilder, - HtmlBuilder, - Sanitizer, -] - - -def get_builder_class(builder_id): - return next(b for b in builders if b.identifier == builder_id) +builders = OrderedDict([ + ("txt", TxtBuilder), + ("html", HtmlBuilder), + ("html-standalone", StandaloneHtmlBuilder), + ("sanitizer", Sanitizer), +]) diff --git a/src/librarian/builders/html.py b/src/librarian/builders/html.py index 8015c6a..40d7777 100644 --- a/src/librarian/builders/html.py +++ b/src/librarian/builders/html.py @@ -1,4 +1,8 @@ +# coding: utf-8 +from __future__ import unicode_literals + from lxml import etree +from librarian.html import add_anchors, add_table_of_contents, add_table_of_themes from librarian import OutputFile @@ -9,41 +13,52 @@ class HtmlBuilder: def __init__(self, image_location='https://wolnelektury.pl/media/book/pictures/marcos-historia-kolorow/'): self.image_location = image_location - #self.tree = etree.Element('html') - #body = etree.SubElement(self.tree, 'body') - #text = etree.SubElement(body, 'div', **{'id': 'book-text'}) self.tree = text = etree.Element('div', **{'id': 'book-text'}) - toc = etree.SubElement(text, 'div', id='toc') - themes = etree.SubElement(text, 'div', id='themes') - h1 = etree.SubElement(text, 'h1') + self.header = etree.SubElement(text, 'h1') + + self.footnotes = etree.Element('div', id='footnotes') + self.footnote_counter = 0 + + self.nota_red = etree.Element('div', id='nota_red') self.cursors = { None: text, - 'toc': toc, - 'themes': themes, - 'header': h1, + 'header': self.header, + 'footnotes': self.footnotes, + 'nota_red': self.nota_red, } self.current_cursors = [None] + @property + def cursor(self): + return self.cursors[self.current_cursors[-1]] + + @cursor.setter + def cursor(self, value): + self.cursors[self.current_cursors[-1]] = value + def enter_fragment(self, fragment): self.current_cursors.append(fragment) def exit_fragment(self): self.current_cursors.pop() - + + def create_fragment(self, name, element): + assert name not in self.cursors + self.cursors[name] = element + + def forget_fragment(self, name): + del self.cursors[name] + + def preprocess(self, document): + document._compat_assign_ordered_ids() + document._compat_assign_section_ids() + def build(self, document): + self.preprocess(document) document.tree.getroot().html_build(self) + self.postprocess(document) - head = etree.Element('head') - self.tree.insert(0, head) - etree.SubElement( - head, - 'link', - href="https://static.wolnelektury.pl/css/compressed/book_text.b15153e56c0a.css", - rel="stylesheet", - type="text/css", - ) - return OutputFile.from_bytes( etree.tostring( self.tree, @@ -53,20 +68,92 @@ class HtmlBuilder: ) ) - def start_element(self, tag, attrib): - self.cursors[self.current_cursors[-1]] = etree.SubElement( - self.cursors[self.current_cursors[-1]], + def postprocess(self, document): + _ = document.tree.getroot().master.gettext + + if document.meta.translators: + self.enter_fragment('header') + self.start_element('span', {'class': 'translator'}) + self.push_text(_("translated by") + " ") + self.push_text( + ", ".join( + translator.readable() + for translator in document.meta.translators + ) + ) + self.exit_fragment() + + add_anchors(self.tree) + if len(self.nota_red): + self.tree.append(self.nota_red) + add_table_of_themes(self.tree) + add_table_of_contents(self.tree) + + if self.footnote_counter: + fnheader = etree.Element("h3") + fnheader.text = _("Footnotes") + self.footnotes.insert(0, fnheader) + self.tree.append(self.footnotes) + + def start_element(self, tag, attrib=None): + self.cursor = etree.SubElement( + self.cursor, tag, - **attrib + **(attrib or {}) ) - print(self.cursors) def end_element(self): - self.cursors[self.current_cursors[-1]] = self.cursors[self.current_cursors[-1]].getparent() + self.cursor = self.cursor.getparent() def push_text(self, text): - cursor = self.cursors[self.current_cursors[-1]] + if text == 'Między nami nic nie było': + print(self.cursors) + print(self.current_cursors) + cursor = self.cursor if len(cursor): - cursor.tail = (cursor[-1].tail or '') + text + cursor[-1].tail = (cursor[-1].tail or '') + text else: cursor.text = (cursor.text or '') + text + + +class StandaloneHtmlBuilder(HtmlBuilder): + def postprocess(self, document): + super(StandaloneHtmlBuilder, self).postprocess(document) + + tree = etree.Element('html') + body = etree.SubElement(tree, 'body') + body.append(self.tree) + self.tree = tree + + head = etree.Element('head') + tree.insert(0, head) + + + etree.SubElement(head, 'meta', charset='utf-8') + etree.SubElement(head, 'title').text = document.meta.title + + etree.SubElement( + head, + 'meta', + name="viewport", + content="width=device-width, initial-scale=1, maximum-scale=1" + ) + + etree.SubElement( + head, + 'link', + href="https://static.wolnelektury.pl/css/compressed/book_text.css", + rel="stylesheet", + type="text/css", + ) + + etree.SubElement( + body, 'script', + src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js" + ) + + etree.SubElement( + body, + "script", + src="http://malsup.github.io/min/jquery.cycle2.min.js" + ) diff --git a/src/librarian/builders/txt.py b/src/librarian/builders/txt.py index 4023814..2830e89 100644 --- a/src/librarian/builders/txt.py +++ b/src/librarian/builders/txt.py @@ -94,9 +94,13 @@ class TxtBuilder: self.enter_fragment('header') if meta.translators: - self.push_text("tłum. ", 'header') - for translator in meta.translators: - self.push_text(translator.readable()) + self.push_text("tłum. ") + self.push_text( + ", ".join( + translator.readable() + for translator in meta.translators + ) + ) #builder.push_margin(2) self.push_legacy_margin(1) diff --git a/src/librarian/command_line.py b/src/librarian/command_line.py index 91196f1..3c17daf 100644 --- a/src/librarian/command_line.py +++ b/src/librarian/command_line.py @@ -9,7 +9,7 @@ def main(*args, **kwargs): parser.add_argument( 'builder', - choices=[b.identifier for b in builders], + choices=builders.keys(), help="Builder" ) parser.add_argument('input_file') @@ -23,6 +23,7 @@ def main(*args, **kwargs): ) args = parser.parse_args() + builder = builders[args.builder] if args.output_file: output_file_path = args.output_file diff --git a/src/librarian/document.py b/src/librarian/document.py index 1bd249d..c0efd3a 100644 --- a/src/librarian/document.py +++ b/src/librarian/document.py @@ -1,7 +1,11 @@ +import gettext +import os +import re from lxml import etree -from .builders import get_builder_class +from .builders import builders from .parser import parser from . import dcparser +from .functions import lang_code_3to2 class WLDocument: @@ -20,5 +24,43 @@ class WLDocument: return master.meta def build(self, builder_id, **kwargs): - return get_builder_class(builder_id)().build(self, **kwargs) - + return builders[builder_id]().build(self, **kwargs) + + def _compat_assign_ordered_ids(self): + """ + Compatibility: ids in document order, to be roughly compatible with legacy + footnote ids. Just for testing consistency, change to some sane identifiers + at convenience. + """ + EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE) + def _compat_assign_ordered_ids_in_elem(elem, i): + elem.attrib['_compat_ordered_id'] = str(i) + i += 1 + if getattr(elem, 'HTML_CLASS', None) == 'stanza': + if elem.text: + i += len(EXPR.split(elem.text)) - 1 + for sub in elem: + i = _compat_assign_ordered_ids_in_elem(sub, i) + if sub.tail: + i += len(EXPR.split(sub.tail)) - 1 + else: + if elem.tag in ('uwaga', 'extra'): + return i + for sub in elem: + i = _compat_assign_ordered_ids_in_elem(sub, i) + return i + + _compat_assign_ordered_ids_in_elem(self.tree.getroot(), 4) + + def _compat_assign_section_ids(self): + """ + Ids in master-section order. These need to be compatible with the + #secN anchors used by WL search results page to link to fragments. + """ + def _compat_assigns_section_ids_in_elem(elem, prefix='sec'): + for i, child in enumerate(elem): + idfier = '{}{}'.format(prefix, i + 1) + child.attrib['_compat_section_id'] = idfier + _compat_assigns_section_ids_in_elem(child, idfier + '-') + _compat_assigns_section_ids_in_elem(self.tree.getroot().master) + diff --git a/src/librarian/elements/__init__.py b/src/librarian/elements/__init__.py index 512b4ef..f3a8521 100644 --- a/src/librarian/elements/__init__.py +++ b/src/librarian/elements/__init__.py @@ -33,6 +33,7 @@ WL_ELEMENTS = { "dlugi_cytat": blocks.DlugiCytat, "poezja_cyt": blocks.PoezjaCyt, "dlugi_cyt": blocks.DlugiCytat, ### ??? + "ramka": blocks.Ramka, "slowo_obce": styles.SlowoObce, "tytul_dziela": styles.TytulDziela, @@ -52,22 +53,22 @@ WL_ELEMENTS = { "kwestia": drama.Kwestia, "didask_tekst": drama.DidaskTekst, - "dedykacja": paragraphs.Akap, - "miejsce_czas": paragraphs.Akap, + "dedykacja": blocks.Dedykacja, + "miejsce_czas": drama.MiejsceCzas, "uwaga": comments.Uwaga, "wers": poetry.Wers, "wers_wciety": poetry.WersWciety, "wers_cd": poetry.WersCd, - "wers_akap": poetry.Wers, + "wers_akap": poetry.WersAkap, "zastepnik_wersu": poetry.ZastepnikWersu, - "wers_do_prawej": poetry.Wers, + "wers_do_prawej": poetry.WersDoPrawej, - "pa": footnotes.Footnote, - "pe": footnotes.Footnote, - "pr": footnotes.Footnote, - "pt": footnotes.Footnote, + "pa": footnotes.PA, + "pe": footnotes.PE, + "pr": footnotes.PR, + "pt": footnotes.PT, "begin": themes.Begin, "end": themes.End, @@ -75,7 +76,7 @@ WL_ELEMENTS = { "nota": blocks.Nota, - "nota_red": comments.Abstrakt, + "nota_red": comments.NotaRed, "extra": comments.Abstrakt, "abstrakt": comments.Abstrakt, @@ -85,6 +86,11 @@ WL_ELEMENTS = { "naglowek_rozdzial": headers.NaglowekRozdzial, "naglowek_podrozdzial": headers.NaglowekPodrozdzial, "srodtytul": headers.NaglowekCzesc, + "podtytul_czesc": headers.PodtytulCzesc, + "podtytul_akt": headers.PodtytulCzesc, + "podtytul_scena": headers.PodtytulRozdzial, + "podtytul_rozdzial": headers.PodtytulRozdzial, + "podtytul_podrozdzial": headers.PodtytulPodrozdzial, "naglowek_listy": drama.NaglowekListy, @@ -92,24 +98,25 @@ WL_ELEMENTS = { "sekcja_swiatlo": separators.SekcjaSwiatlo, "separator_linia": separators.SeparatorLinia, - "wieksze_odstepy": styles.Wyroznienie, - "mat": styles.Wyroznienie, - "www": styles.Wyroznienie, - "indeks_dolny": styles.Wyroznienie, + "wieksze_odstepy": styles.WiekszeOdstepy, + "mat": styles.Mat, + "www": styles.WWW, + "indeks_dolny": styles.IndeksDolny, - "tabela": paragraphs.Akap, - "tabelka": paragraphs.Akap, - "wiersz": paragraphs.Akap, - "kol": paragraphs.Akap, + "tabela": figures.Tabela, + "tabelka": figures.Tabela, + "wiersz": figures.Wiersz, + "kol": figures.Kol, + "animacja": figures.Animacja, "ilustr": figures.Ilustr, -# sklodowska-badanie-cial-radioaktywnych.xml - "mrow": paragraphs.Akap, - "mi": paragraphs.Akap, - "mo": paragraphs.Akap, - "msup": paragraphs.Akap, - "mn": paragraphs.Akap, - "mfrac": paragraphs.Akap, - "mfenced": paragraphs.Akap, + # Inline MathML, should really be namespaced. + "mrow": etree.ElementBase, + "mi": etree.ElementBase, + "mo": etree.ElementBase, + "msup": etree.ElementBase, + "mn": etree.ElementBase, + "mfrac": etree.ElementBase, + "mfenced": etree.ElementBase, } diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py index fb5e3b1..b9df185 100644 --- a/src/librarian/elements/base.py +++ b/src/librarian/elements/base.py @@ -3,6 +3,7 @@ import re from lxml import etree from librarian import dcparser, RDFNS +from librarian.util import get_translation class WLElement(etree.ElementBase): @@ -14,7 +15,6 @@ class WLElement(etree.ElementBase): HTML_TAG = None HTML_ATTR = {} HTML_CLASS = None - HTML_SECTION = False CAN_HAVE_TEXT = True STRIP = False @@ -37,7 +37,7 @@ class WLElement(etree.ElementBase): else: self._meta_object = None return self._meta_object - + @property def meta(self): if self.meta_object is not None: @@ -47,7 +47,11 @@ class WLElement(etree.ElementBase): return self.getparent().meta else: return self.document.base_meta - + + @property + def gettext(self): + return get_translation(self.meta.language).gettext + def normalize_text(self, text): text = text or '' for e, s in self.text_substitutions: @@ -99,35 +103,17 @@ class WLElement(etree.ElementBase): # always copy the id attribute (?) if self.attrib.get('id'): attr['id'] = self.attrib['id'] + elif '_compat_section_id' in self.attrib: + attr['id'] = self.attrib['_compat_section_id'] return attr - - def html_build(self, builder): - if self.HTML_SECTION: - builder.start_element( - 'a', {"name": "f18", "class": "target"} - ) - builder.push_text(" ") - builder.end_element() - - builder.start_element( - "a", {"href": "#f18", "class": "anchor"} - ) - builder.push_text("18") - builder.end_element() - + def html_build(self, builder): if self.HTML_TAG: builder.start_element( self.HTML_TAG, self.get_html_attr(builder), ) - if self.HTML_SECTION: - builder.start_element( - "a", {"name": "sec34"} - ) - builder.end_element() - self._html_build_inner(builder) if self.HTML_TAG: builder.end_element() diff --git a/src/librarian/elements/blocks/__init__.py b/src/librarian/elements/blocks/__init__.py index 075493a..38dd488 100644 --- a/src/librarian/elements/blocks/__init__.py +++ b/src/librarian/elements/blocks/__init__.py @@ -1,3 +1,6 @@ +from .dedykacja import Dedykacja from .dlugi_cytat import DlugiCytat from .nota import Nota from .poezja_cyt import PoezjaCyt +from .ramka import Ramka + diff --git a/src/librarian/elements/blocks/dedykacja.py b/src/librarian/elements/blocks/dedykacja.py new file mode 100644 index 0000000..5436271 --- /dev/null +++ b/src/librarian/elements/blocks/dedykacja.py @@ -0,0 +1,8 @@ +from ..base import WLElement + + +class Dedykacja(WLElement): + TXT_LEGACY_TOP_MARGIN = 2 + + HTML_TAG = "div" + HTML_CLASS = "dedication" diff --git a/src/librarian/elements/blocks/dlugi_cytat.py b/src/librarian/elements/blocks/dlugi_cytat.py index 8137867..c660583 100644 --- a/src/librarian/elements/blocks/dlugi_cytat.py +++ b/src/librarian/elements/blocks/dlugi_cytat.py @@ -8,3 +8,5 @@ class DlugiCytat(WLElement): TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 1 TXT_LEGACY_BOTTOM_MARGIN = 0 + + HTML_TAG = 'blockquote' diff --git a/src/librarian/elements/blocks/nota.py b/src/librarian/elements/blocks/nota.py index c38021d..a01bf29 100644 --- a/src/librarian/elements/blocks/nota.py +++ b/src/librarian/elements/blocks/nota.py @@ -3,3 +3,6 @@ from ..base import WLElement class Nota(WLElement): CAN_HAVE_TEXT = False + + HTML_TAG = "div" + HTML_CLASS = "note" diff --git a/src/librarian/elements/blocks/poezja_cyt.py b/src/librarian/elements/blocks/poezja_cyt.py index 3349567..0c103b1 100644 --- a/src/librarian/elements/blocks/poezja_cyt.py +++ b/src/librarian/elements/blocks/poezja_cyt.py @@ -8,3 +8,5 @@ class PoezjaCyt(WLElement): TXT_BOTTOM_MARGIN = 3 TXT_LEGACY_TOP_MARGIN = 1 TXT_LEGACY_BOTTOM_MARGIN = 0 + + HTML_TAG = 'blockquote' diff --git a/src/librarian/elements/blocks/ramka.py b/src/librarian/elements/blocks/ramka.py new file mode 100644 index 0000000..d8dd5f0 --- /dev/null +++ b/src/librarian/elements/blocks/ramka.py @@ -0,0 +1,7 @@ +from ..base import WLElement + + +class Ramka(WLElement): + HTML_TAG = "div" + HTML_CLASS = "ramka" + diff --git a/src/librarian/elements/comments/__init__.py b/src/librarian/elements/comments/__init__.py index 9073a91..86ecb17 100644 --- a/src/librarian/elements/comments/__init__.py +++ b/src/librarian/elements/comments/__init__.py @@ -1,2 +1,3 @@ from .abstrakt import Abstrakt +from .nota_red import NotaRed from .uwaga import Uwaga diff --git a/src/librarian/elements/comments/nota_red.py b/src/librarian/elements/comments/nota_red.py new file mode 100644 index 0000000..faa5dd1 --- /dev/null +++ b/src/librarian/elements/comments/nota_red.py @@ -0,0 +1,11 @@ +from ..base import WLElement + + +class NotaRed(WLElement): + def txt_build(self, builder): + pass + + def html_build(self, builder): + builder.enter_fragment('nota_red') + super(NotaRed, self).html_build(builder) + builder.exit_fragment() diff --git a/src/librarian/elements/comments/uwaga.py b/src/librarian/elements/comments/uwaga.py index 5a5e26c..adf908b 100644 --- a/src/librarian/elements/comments/uwaga.py +++ b/src/librarian/elements/comments/uwaga.py @@ -5,3 +5,5 @@ class Uwaga(WLElement): def txt_build(self, builder): pass + def html_build(self, builder): + pass diff --git a/src/librarian/elements/drama/__init__.py b/src/librarian/elements/drama/__init__.py index 1c88a6a..c1cc601 100644 --- a/src/librarian/elements/drama/__init__.py +++ b/src/librarian/elements/drama/__init__.py @@ -3,6 +3,7 @@ from .didask_tekst import DidaskTekst from .kwestia import Kwestia from .lista_osoba import ListaOsoba from .lista_osob import ListaOsob +from .miejsce_czas import MiejsceCzas from .naglowek_listy import NaglowekListy from .naglowek_osoba import NaglowekOsoba from .osoba import Osoba diff --git a/src/librarian/elements/drama/didask_tekst.py b/src/librarian/elements/drama/didask_tekst.py index 7b6ae54..7227c17 100644 --- a/src/librarian/elements/drama/didask_tekst.py +++ b/src/librarian/elements/drama/didask_tekst.py @@ -4,3 +4,6 @@ from ..base import WLElement class DidaskTekst(WLElement): TXT_PREFIX = "/ " TXT_SUFFIX = " /" + + HTML_TAG = "em" + HTML_CLASS = "didaskalia" diff --git a/src/librarian/elements/drama/didaskalia.py b/src/librarian/elements/drama/didaskalia.py index cdd7900..af0520f 100644 --- a/src/librarian/elements/drama/didaskalia.py +++ b/src/librarian/elements/drama/didaskalia.py @@ -9,3 +9,5 @@ class Didaskalia(WLElement): TXT_PREFIX = "/ " TXT_SUFFIX = " /" + HTML_TAG = "div" + HTML_CLASS = "didaskalia" diff --git a/src/librarian/elements/drama/kwestia.py b/src/librarian/elements/drama/kwestia.py index 0bb5f3d..27dca30 100644 --- a/src/librarian/elements/drama/kwestia.py +++ b/src/librarian/elements/drama/kwestia.py @@ -4,3 +4,5 @@ from ..base import WLElement class Kwestia(WLElement): CAN_HAVE_TEXT = False + HTML_TAG = "div" + HTML_CLASS = "kwestia" diff --git a/src/librarian/elements/drama/lista_osob.py b/src/librarian/elements/drama/lista_osob.py index ec18472..5beca64 100644 --- a/src/librarian/elements/drama/lista_osob.py +++ b/src/librarian/elements/drama/lista_osob.py @@ -1,3 +1,4 @@ +from lxml import etree from ..base import WLElement @@ -9,3 +10,12 @@ class ListaOsob(WLElement): TXT_LEGACY_TOP_MARGIN = 3 TXT_LEGACY_BOTTOM_MARGIN = 1 + HTML_TAG = "div" + HTML_CLASS = "person-list" + + def _html_build_inner(self, builder): + ol = etree.Element('ol') + builder.create_fragment('list', ol) + super(ListaOsob, self)._html_build_inner(builder) + builder.cursor.append(ol) + builder.forget_fragment('list') diff --git a/src/librarian/elements/drama/lista_osoba.py b/src/librarian/elements/drama/lista_osoba.py index 5759c50..fe55838 100644 --- a/src/librarian/elements/drama/lista_osoba.py +++ b/src/librarian/elements/drama/lista_osoba.py @@ -8,3 +8,10 @@ class ListaOsoba(WLElement): TXT_LEGACY_BOTTOM_MARGIN = 0 TXT_PREFIX = " * " + HTML_TAG = "li" + + def html_build(self, builder): + builder.enter_fragment('list') + super(ListaOsoba, self).html_build(builder) + builder.exit_fragment() + diff --git a/src/librarian/elements/drama/miejsce_czas.py b/src/librarian/elements/drama/miejsce_czas.py new file mode 100644 index 0000000..a4e9453 --- /dev/null +++ b/src/librarian/elements/drama/miejsce_czas.py @@ -0,0 +1,5 @@ +from ..paragraphs import Akap + + +class MiejsceCzas(Akap): + HTML_CLASS = 'place-and-time' diff --git a/src/librarian/elements/drama/naglowek_listy.py b/src/librarian/elements/drama/naglowek_listy.py index 398a055..4db9111 100644 --- a/src/librarian/elements/drama/naglowek_listy.py +++ b/src/librarian/elements/drama/naglowek_listy.py @@ -2,4 +2,4 @@ from ..base import WLElement class NaglowekListy(WLElement): - pass + HTML_TAG = "h3" diff --git a/src/librarian/elements/drama/naglowek_osoba.py b/src/librarian/elements/drama/naglowek_osoba.py index 076936b..5ab78fd 100644 --- a/src/librarian/elements/drama/naglowek_osoba.py +++ b/src/librarian/elements/drama/naglowek_osoba.py @@ -7,3 +7,4 @@ class NaglowekOsoba(WLElement): TXT_LEGACY_TOP_MARGIN = 3 TXT_LEGACY_BOTTOM_MARGIN = 0 + HTML_TAG = "h4" diff --git a/src/librarian/elements/drama/osoba.py b/src/librarian/elements/drama/osoba.py index fea8d60..b0fb793 100644 --- a/src/librarian/elements/drama/osoba.py +++ b/src/librarian/elements/drama/osoba.py @@ -2,5 +2,6 @@ from ..base import WLElement class Osoba(WLElement): - pass + HTML_TAG = "em" + HTML_CLASS = "person" diff --git a/src/librarian/elements/figures/__init__.py b/src/librarian/elements/figures/__init__.py index a0c464a..42b8015 100644 --- a/src/librarian/elements/figures/__init__.py +++ b/src/librarian/elements/figures/__init__.py @@ -1 +1,5 @@ +from .animacja import Animacja from .ilustr import Ilustr +from .tabela import Tabela +from .wiersz import Wiersz +from .kol import Kol diff --git a/src/librarian/elements/figures/animacja.py b/src/librarian/elements/figures/animacja.py new file mode 100644 index 0000000..e98fa88 --- /dev/null +++ b/src/librarian/elements/figures/animacja.py @@ -0,0 +1,12 @@ +from ..base import WLElement + + +class Animacja(WLElement): + HTML_TAG = 'div' + HTML_CLASS = "animacja cycle-slideshow" + HTML_ATTR = { + "data-cycle-pause-on-hover": "true", + "data-cycle-next": "> img", + "data-cycle-fx": "fadeout", + "data-cycle-paused": "true", + } diff --git a/src/librarian/elements/figures/ilustr.py b/src/librarian/elements/figures/ilustr.py index bd51453..143132e 100644 --- a/src/librarian/elements/figures/ilustr.py +++ b/src/librarian/elements/figures/ilustr.py @@ -6,5 +6,7 @@ class Ilustr(WLElement): def get_html_attr(self, builder): return { - 'src': builder.image_location + self.attrib['src'] + 'src': builder.image_location + self.attrib['src'], + 'alt': self.attr['alt'], + 'title': self.attr['alt'], } diff --git a/src/librarian/elements/figures/kol.py b/src/librarian/elements/figures/kol.py new file mode 100644 index 0000000..e0dae02 --- /dev/null +++ b/src/librarian/elements/figures/kol.py @@ -0,0 +1,5 @@ +from ..base import WLElement + + +class Kol(WLElement): + HTML_TAG = 'td' diff --git a/src/librarian/elements/figures/tabela.py b/src/librarian/elements/figures/tabela.py new file mode 100644 index 0000000..387041d --- /dev/null +++ b/src/librarian/elements/figures/tabela.py @@ -0,0 +1,12 @@ +from ..base import WLElement + + +class Tabela(WLElement): + HTML_TAG = 'table' + + def get_html_attr(self, builder): + if self.attrib['ramka'] == '1': + return { + 'class': 'border' + } + return {} diff --git a/src/librarian/elements/figures/wiersz.py b/src/librarian/elements/figures/wiersz.py new file mode 100644 index 0000000..bc61f9d --- /dev/null +++ b/src/librarian/elements/figures/wiersz.py @@ -0,0 +1,5 @@ +from ..base import WLElement + + +class Wiersz(WLElement): + HTML_TAG = 'tr' diff --git a/src/librarian/elements/footnotes/__init__.py b/src/librarian/elements/footnotes/__init__.py index eefe9db..82f6497 100644 --- a/src/librarian/elements/footnotes/__init__.py +++ b/src/librarian/elements/footnotes/__init__.py @@ -5,3 +5,63 @@ class Footnote(WLElement): def txt_build(self, builder): pass + def html_build(self, builder): + builder.footnote_counter += 1 + fn_no = builder.footnote_counter + footnote_id = 'footnote-idm{}'.format(self.attrib['_compat_ordered_id']) + anchor_id = 'anchor-idm{}'.format(self.attrib['_compat_ordered_id']) + + builder.start_element('a', {"href": '#{}'.format(footnote_id), "class": "annotation"}) + builder.push_text('[{}]'.format(fn_no)) + builder.end_element() + + builder.enter_fragment('footnotes') + builder.start_element('div', {'class': 'fn-{}'.format(self.tag)}) + builder.push_text('\n') # Compat + builder.start_element('a', {'name': footnote_id}) + builder.end_element() + builder.start_element('a', { + 'href': '#{}'.format(anchor_id), 'class': 'annotation' + }) + builder.push_text('[{}]'.format(fn_no)) + builder.end_element() + + builder.start_element('p') + super(Footnote, self).html_build(builder) + + builder.push_text(' [{}]'.format(self.qualifier)) + builder.end_element() + builder.end_element() + builder.exit_fragment() + + +class PA(Footnote): + """Przypis autorski.""" + @property + def qualifier(self): + _ = self.gettext + return _("author's footnote") + + +class PT(Footnote): + """Przypis tłumacza.""" + @property + def qualifier(self): + _ = self.gettext + return _("translator's footnote") + + +class PR(Footnote): + """Przypis redakcyjny.""" + @property + def qualifier(self): + _ = self.gettext + return _("editor's footnote") + + +class PE(Footnote): + """Przypis redakcji źródła.""" + @property + def qualifier(self): + _ = self.gettext + return _("source editor's footnote") diff --git a/src/librarian/elements/front/dzielo_nadrzedne.py b/src/librarian/elements/front/dzielo_nadrzedne.py index c53b3ad..a034ae7 100644 --- a/src/librarian/elements/front/dzielo_nadrzedne.py +++ b/src/librarian/elements/front/dzielo_nadrzedne.py @@ -4,3 +4,5 @@ from .base import HeaderElement class DzieloNadrzedne(HeaderElement): TXT_BOTTOM_MARGIN = 1 TXT_LEGACY_BOTTOM_MARGIN = 1 + + HTML_CLASS = "collection" diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py index fd81220..98c7334 100644 --- a/src/librarian/elements/front/motto.py +++ b/src/librarian/elements/front/motto.py @@ -4,3 +4,6 @@ from ..base import WLElement class Motto(WLElement): TXT_LEGACY_TOP_MARGIN = 4 TXT_LEGACY_BOTTOM_MARGIN = 2 + + HTML_TAG = "div" + HTML_CLASS = "motto" diff --git a/src/librarian/elements/front/motto_podpis.py b/src/librarian/elements/front/motto_podpis.py index decbff3..58fc9db 100644 --- a/src/librarian/elements/front/motto_podpis.py +++ b/src/librarian/elements/front/motto_podpis.py @@ -2,4 +2,6 @@ from ..base import WLElement class MottoPodpis(WLElement): - pass + HTML_TAG = "p" + HTML_CLASS = "motto_podpis" + diff --git a/src/librarian/elements/front/nazwa_utworu.py b/src/librarian/elements/front/nazwa_utworu.py index 55ef78b..aa68082 100644 --- a/src/librarian/elements/front/nazwa_utworu.py +++ b/src/librarian/elements/front/nazwa_utworu.py @@ -5,5 +5,4 @@ class NazwaUtworu(HeaderElement): TXT_BOTTOM_MARGIN = 1 TXT_LEGACY_BOTTOM_MARGIN = 1 - HTML_TAG = 'span' HTML_CLASS = 'title' diff --git a/src/librarian/elements/headers/__init__.py b/src/librarian/elements/headers/__init__.py index 9ddf8d5..3389eec 100644 --- a/src/librarian/elements/headers/__init__.py +++ b/src/librarian/elements/headers/__init__.py @@ -1,3 +1,6 @@ from .naglowek_czesc import NaglowekCzesc from .naglowek_podrozdzial import NaglowekPodrozdzial from .naglowek_rozdzial import NaglowekRozdzial +from .podtytul_czesc import PodtytulCzesc +from .podtytul_rozdzial import PodtytulRozdzial +from .podtytul_podrozdzial import PodtytulPodrozdzial diff --git a/src/librarian/elements/headers/naglowek_czesc.py b/src/librarian/elements/headers/naglowek_czesc.py index 7b0781a..c7b2d9e 100644 --- a/src/librarian/elements/headers/naglowek_czesc.py +++ b/src/librarian/elements/headers/naglowek_czesc.py @@ -6,3 +6,5 @@ class NaglowekCzesc(WLElement): TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 5 TXT_LEGACY_BOTTOM_MARGIN = 0 + + HTML_TAG = "h2" diff --git a/src/librarian/elements/headers/naglowek_podrozdzial.py b/src/librarian/elements/headers/naglowek_podrozdzial.py index 6d3f85f..1cea01c 100644 --- a/src/librarian/elements/headers/naglowek_podrozdzial.py +++ b/src/librarian/elements/headers/naglowek_podrozdzial.py @@ -6,3 +6,5 @@ class NaglowekPodrozdzial(WLElement): TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 3 TXT_LEGACY_BOTTOM_MARGIN = 0 + + HTML_TAG = "h4" diff --git a/src/librarian/elements/headers/podtytul_czesc.py b/src/librarian/elements/headers/podtytul_czesc.py new file mode 100644 index 0000000..9825211 --- /dev/null +++ b/src/librarian/elements/headers/podtytul_czesc.py @@ -0,0 +1,9 @@ +from ..base import WLElement + + +class PodtytulCzesc(WLElement): + TXT_TOP_MARGIN = 2 + TXT_BOTTOM_MARGIN = 2 + + HTML_TAG = "div" + HTML_CLASS = "subtitle2" diff --git a/src/librarian/elements/headers/podtytul_podrozdzial.py b/src/librarian/elements/headers/podtytul_podrozdzial.py new file mode 100644 index 0000000..74aef13 --- /dev/null +++ b/src/librarian/elements/headers/podtytul_podrozdzial.py @@ -0,0 +1,9 @@ +from ..base import WLElement + + +class PodtytulPodrozdzial(WLElement): + TXT_TOP_MARGIN = 2 + TXT_BOTTOM_MARGIN = 2 + + HTML_TAG = "div" + HTML_CLASS = "subtitle4" diff --git a/src/librarian/elements/headers/podtytul_rozdzial.py b/src/librarian/elements/headers/podtytul_rozdzial.py new file mode 100644 index 0000000..675e19b --- /dev/null +++ b/src/librarian/elements/headers/podtytul_rozdzial.py @@ -0,0 +1,9 @@ +from ..base import WLElement + + +class PodtytulRozdzial(WLElement): + TXT_TOP_MARGIN = 2 + TXT_BOTTOM_MARGIN = 2 + + HTML_TAG = "div" + HTML_CLASS = "subtitle3" diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py index 836671b..0a76c52 100644 --- a/src/librarian/elements/paragraphs/akap.py +++ b/src/librarian/elements/paragraphs/akap.py @@ -11,5 +11,3 @@ class Akap(WLElement): HTML_TAG = 'p' HTML_CLASS = 'paragraph' - - HTML_SECTION = True diff --git a/src/librarian/elements/poetry/__init__.py b/src/librarian/elements/poetry/__init__.py index 80fdc4f..4784ad0 100644 --- a/src/librarian/elements/poetry/__init__.py +++ b/src/librarian/elements/poetry/__init__.py @@ -1,5 +1,7 @@ from .strofa import Strofa +from .wers_akap import WersAkap from .wers_cd import WersCd +from .wers_do_prawej import WersDoPrawej from .wers import Wers from .wers_wciety import WersWciety from .zastepnik_wersu import ZastepnikWersu diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py index 2d3a4c9..7df549f 100644 --- a/src/librarian/elements/poetry/strofa.py +++ b/src/librarian/elements/poetry/strofa.py @@ -9,6 +9,9 @@ class Strofa(WLElement): TXT_LEGACY_TOP_MARGIN = 1 TXT_LEGACY_BOTTOM_MARGIN = 0 + HTML_TAG = 'div' + HTML_CLASS = 'stanza' + def get_verses(self): from librarian.parser import parser @@ -38,6 +41,7 @@ class Strofa(WLElement): verses[-1].append(child) for verse in verses: + verse.stanza = self if len(verse) == 1 and isinstance(verse[0], Wers): assert not (verse.text or '').strip() assert not (verse[0].tail or '').strip() diff --git a/src/librarian/elements/poetry/wers.py b/src/librarian/elements/poetry/wers.py index e164b1d..5c28058 100644 --- a/src/librarian/elements/poetry/wers.py +++ b/src/librarian/elements/poetry/wers.py @@ -10,4 +10,10 @@ class Wers(WLElement): TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = 'div' - HTML_ATTRIB = {"class": "verse"} + HTML_CLASS = 'verse' + + @property + def meta(self): + if hasattr(self, 'stanza'): + return self.stanza.meta + return super(Wers, self).meta diff --git a/src/librarian/elements/poetry/wers_akap.py b/src/librarian/elements/poetry/wers_akap.py new file mode 100644 index 0000000..03b8187 --- /dev/null +++ b/src/librarian/elements/poetry/wers_akap.py @@ -0,0 +1,9 @@ +from .wers import Wers + + +class WersAkap(Wers): + TXT_PREFIX = ' ' + + HTML_ATTR = { + "style": "padding-left: 1em" + } diff --git a/src/librarian/elements/poetry/wers_cd.py b/src/librarian/elements/poetry/wers_cd.py index 7a14938..a61d5bc 100644 --- a/src/librarian/elements/poetry/wers_cd.py +++ b/src/librarian/elements/poetry/wers_cd.py @@ -4,3 +4,7 @@ class WersCd(Wers): def _txt_build_inner(self, builder): builder.push_text(' ' * 24, prepared=True) super(WersCd, self)._txt_build_inner(builder) + + HTML_ATTR = { + "style": "padding-left: 12em", + } diff --git a/src/librarian/elements/poetry/wers_do_prawej.py b/src/librarian/elements/poetry/wers_do_prawej.py new file mode 100644 index 0000000..9ab5ff0 --- /dev/null +++ b/src/librarian/elements/poetry/wers_do_prawej.py @@ -0,0 +1,9 @@ +from .wers import Wers + + +class WersDoPrawej(Wers): + TXT_PREFIX = ' ' + + HTML_ATTR = { + "style": "text-align: right", + } diff --git a/src/librarian/elements/poetry/wers_wciety.py b/src/librarian/elements/poetry/wers_wciety.py index 3e9bb6f..8ac2bb3 100644 --- a/src/librarian/elements/poetry/wers_wciety.py +++ b/src/librarian/elements/poetry/wers_wciety.py @@ -4,13 +4,17 @@ from .wers import Wers class WersWciety(Wers): @property def typ(self): - ## Temporary legacy compatibility fix. - return 2 if 'typ' in self.attrib else 1 - v = self.attrib.get('typ') return int(v) if v else 1 def _txt_build_inner(self, builder): + ## Temporary legacy compatibility fix. + typ = min(self.typ, 2) + builder.push_text(' ' * self.typ, prepared=True) super(WersWciety, self)._txt_build_inner(builder) + def get_html_attr(self, builder): + attr = super(WersWciety, self).get_html_attr(builder) + attr['style'] = "padding-left: {}em".format(self.typ) + return attr diff --git a/src/librarian/elements/root/__init__.py b/src/librarian/elements/root/__init__.py index a8cf82d..8e624bd 100644 --- a/src/librarian/elements/root/__init__.py +++ b/src/librarian/elements/root/__init__.py @@ -17,3 +17,9 @@ class Utwor(WLElement): # This should not generally happen. if self.getparent() is not None: return self.getparent().meta + + @property + def master(self): + for c in self: + if isinstance(c, Master): + return c diff --git a/src/librarian/elements/separators/sekcja_asterysk.py b/src/librarian/elements/separators/sekcja_asterysk.py index c11b9d0..e68430d 100644 --- a/src/librarian/elements/separators/sekcja_asterysk.py +++ b/src/librarian/elements/separators/sekcja_asterysk.py @@ -7,5 +7,12 @@ class SekcjaAsterysk(WLElement): TXT_LEGACY_TOP_MARGIN = 2 TXT_LEGACY_BOTTOM_MARGIN = 2 + HTML_TAG = "p" + HTML_CLASS = "spacer-asterisk" + def _txt_build_inner(self, builder): builder.push_text('*') + + def _html_build_inner(self, builder): + builder.push_text("*") + diff --git a/src/librarian/elements/separators/sekcja_swiatlo.py b/src/librarian/elements/separators/sekcja_swiatlo.py index 1526548..7d950da 100644 --- a/src/librarian/elements/separators/sekcja_swiatlo.py +++ b/src/librarian/elements/separators/sekcja_swiatlo.py @@ -5,3 +5,5 @@ class SekcjaSwiatlo(WLElement): TXT_BOTTOM_MARGIN = 6 TXT_LEGACY_BOTTOM_MARGIN = 4 + HTML_TAG = "hr" + HTML_CLASS = "spacer" diff --git a/src/librarian/elements/separators/separator_linia.py b/src/librarian/elements/separators/separator_linia.py index 7587785..5249691 100644 --- a/src/librarian/elements/separators/separator_linia.py +++ b/src/librarian/elements/separators/separator_linia.py @@ -7,5 +7,10 @@ class SeparatorLinia(WLElement): TXT_LEGACY_TOP_MARGIN = 2 TXT_LEGACY_BOTTOM_MARGIN = 2 + HTML_TAG = "hr" + HTML_CLASS = "spacer-line" + def _txt_build_inner(self, builder): builder.push_text('-' * 48) + + diff --git a/src/librarian/elements/styles/__init__.py b/src/librarian/elements/styles/__init__.py index 40afc01..9ff5118 100644 --- a/src/librarian/elements/styles/__init__.py +++ b/src/librarian/elements/styles/__init__.py @@ -1,3 +1,7 @@ +from .indeks_dolny import IndeksDolny +from .mat import Mat from .slowo_obce import SlowoObce from .tytul_dziela import TytulDziela +from .wieksze_odstepy import WiekszeOdstepy from .wyroznienie import Wyroznienie +from .www import WWW diff --git a/src/librarian/elements/styles/indeks_dolny.py b/src/librarian/elements/styles/indeks_dolny.py new file mode 100644 index 0000000..5d19a44 --- /dev/null +++ b/src/librarian/elements/styles/indeks_dolny.py @@ -0,0 +1,7 @@ +from ..base import WLElement + + +class IndeksDolny(WLElement): + TXT_PREFIX = "_" + + HTML_TAG = "sub" diff --git a/src/librarian/elements/styles/mat.py b/src/librarian/elements/styles/mat.py new file mode 100644 index 0000000..f284695 --- /dev/null +++ b/src/librarian/elements/styles/mat.py @@ -0,0 +1,10 @@ +from copy import copy +from ..base import WLElement + + +class Mat(WLElement): + def html_build(self, builder): + e = copy(self) + e.tag = 'math' + e.attrib['xmlns'] = 'http://www.w3.org/1998/Math/MathML' + builder.cursor.append(e) diff --git a/src/librarian/elements/styles/slowo_obce.py b/src/librarian/elements/styles/slowo_obce.py index 537f7c6..3592a1e 100644 --- a/src/librarian/elements/styles/slowo_obce.py +++ b/src/librarian/elements/styles/slowo_obce.py @@ -2,4 +2,5 @@ from ..base import WLElement class SlowoObce(WLElement): - pass + HTML_TAG = 'em' + HTML_CLASS = 'foreign-word' diff --git a/src/librarian/elements/styles/tytul_dziela.py b/src/librarian/elements/styles/tytul_dziela.py index b6c3662..ef3618c 100644 --- a/src/librarian/elements/styles/tytul_dziela.py +++ b/src/librarian/elements/styles/tytul_dziela.py @@ -3,6 +3,9 @@ from ..base import WLElement class TytulDziela(WLElement): + HTML_TAG = 'em' + HTML_CLASS = 'book-title' + def normalize_text(self, text): txt = super(TytulDziela, self).normalize_text(text) if self.attrib.get('typ') == '1': diff --git a/src/librarian/elements/styles/wieksze_odstepy.py b/src/librarian/elements/styles/wieksze_odstepy.py new file mode 100644 index 0000000..3229402 --- /dev/null +++ b/src/librarian/elements/styles/wieksze_odstepy.py @@ -0,0 +1,9 @@ +from ..base import WLElement + + +class WiekszeOdstepy(WLElement): + TXT_PREFIX = "*" + TXT_SUFFIX = "*" + + HTML_TAG = "em" + HTML_CLASS = "wieksze-odstepy" diff --git a/src/librarian/elements/styles/www.py b/src/librarian/elements/styles/www.py new file mode 100644 index 0000000..e15f712 --- /dev/null +++ b/src/librarian/elements/styles/www.py @@ -0,0 +1,5 @@ +from ..base import WLElement + + +class WWW(WLElement): + pass diff --git a/src/librarian/elements/styles/wyroznienie.py b/src/librarian/elements/styles/wyroznienie.py index dce6936..c76b4cf 100644 --- a/src/librarian/elements/styles/wyroznienie.py +++ b/src/librarian/elements/styles/wyroznienie.py @@ -5,3 +5,5 @@ class Wyroznienie(WLElement): TXT_PREFIX = "*" TXT_SUFFIX = "*" + HTML_TAG = "em" + HTML_CLASS = "author-emphasis" diff --git a/src/librarian/elements/themes/end.py b/src/librarian/elements/themes/end.py index a6eb9e0..32c6f36 100644 --- a/src/librarian/elements/themes/end.py +++ b/src/librarian/elements/themes/end.py @@ -2,4 +2,11 @@ from ..base import WLElement class End(WLElement): - pass + HTML_TAG = 'span' + + def get_html_attr(self, builder): + fid = self.attrib['id'][1:] + return { + "class": "theme-end", + "fid": fid + } diff --git a/src/librarian/elements/themes/motyw.py b/src/librarian/elements/themes/motyw.py index 51042c6..6ec6c53 100644 --- a/src/librarian/elements/themes/motyw.py +++ b/src/librarian/elements/themes/motyw.py @@ -2,13 +2,15 @@ from ..base import WLElement class Motyw(WLElement): + HTML_TAG = "a" + def txt_build(self, builder): pass - - def feed_to(self, builder): - assert not len(self) - themes = [ - normalize_text(t.strip()) for t in self.text.split(',') - ] - builder.set_themes(self.attrib['id'], themes) + def get_html_attr(self, builder): + fid = self.attrib['id'][1:] + return { + "class": "theme-begin", + "fid": fid, + "name": "m" + fid, + } diff --git a/src/librarian/html.py b/src/librarian/html.py index 78f3dad..9ec6583 100644 --- a/src/librarian/html.py +++ b/src/librarian/html.py @@ -183,6 +183,8 @@ def extract_fragments(input_filename): while parent.get('id', None) != 'book-text': cparent = copy.deepcopy(parent) cparent.text = None + if 'id' in cparent.attrib: + del cparent.attrib['id'] parents.append(cparent) parent = parent.getparent() @@ -222,8 +224,11 @@ def extract_fragments(input_filename): ) else: for fragment_id in open_fragments: + celem = copy.copy(element) + if 'id' in celem.attrib: + del celem.attrib['id'] open_fragments[fragment_id].append( - event, copy.copy(element) + event, celem ) return closed_fragments, open_fragments diff --git a/src/librarian/locale/pl/LC_MESSAGES/messages.mo b/src/librarian/locale/pl/LC_MESSAGES/messages.mo new file mode 100644 index 0000000000000000000000000000000000000000..1246851f96253d07d1b6cd4f2ff5f52fa1b8b93a GIT binary patch literal 659 zcmZXR&rjPh6vxfR1_W&yNJBg9-~$o|xjePNIEaN z_2I_GydzT@@}0393?=_+lgZ?J?M6QOh9x>HWJaP(@)Sos#4*oDLF=%!H|;KJgu#x{ zo-__Ab9CY9xBrxnq|%!_=}YU#Zxy~LHl}q|Gd;3mPbVf-I*a+niSofOaX^}^#EZtf zZK@~a&Bo_|we7gfl`C0(KjHWh#=4<;Ojt_gFewLmT0?1B7x}Bd#cAWQb`nZlo81AD CV6dnF literal 0 HcmV?d00001 diff --git a/src/librarian/locale/pl/LC_MESSAGES/messages.po b/src/librarian/locale/pl/LC_MESSAGES/messages.po new file mode 100644 index 0000000..e06828f --- /dev/null +++ b/src/librarian/locale/pl/LC_MESSAGES/messages.po @@ -0,0 +1,42 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: \n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: \n" +"PO-Revision-Date: 2020-10-07 12:11+0200\n" +"Last-Translator: Radek Czajka \n" +"Language-Team: \n" +"Language: pl\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 2.3\n" + +#: src/librarian/builders/html.py:77 +msgid "translated by" +msgstr "tłum." + +#: src/librarian/builders/html.py:94 +msgid "Footnotes" +msgstr "Przypisy" + +#: src/librarian/elements/footnotes/__init__.py:43 +msgid "author's footnote" +msgstr "przypis autorski" + +#: src/librarian/elements/footnotes/__init__.py:51 +msgid "translator's footnote" +msgstr "przypis tłumacza" + +#: src/librarian/elements/footnotes/__init__.py:59 +msgid "editor's footnote" +msgstr "przypis redakcyjny" + +#: src/librarian/elements/footnotes/__init__.py:67 +msgid "source editor's footnote" +msgstr "przypis edytorski" diff --git a/src/librarian/util.py b/src/librarian/util.py index 5c9fbc2..2c6b773 100644 --- a/src/librarian/util.py +++ b/src/librarian/util.py @@ -128,3 +128,14 @@ def roman_to_int(input): def makedirs(path): if not os.path.isdir(path): os.makedirs(path) + + +def get_translation(language): + import gettext + from .functions import lang_code_3to2 + + return gettext.translation( + 'messages', + localedir=os.path.join(os.path.dirname(__file__), 'locale'), + languages=[lang_code_3to2(language)], + ) diff --git a/tests/files/text/asnyk_miedzy_nami_expected.html b/tests/files/text/asnyk_miedzy_nami_expected.html index dce71bb..02b9a2f 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected.html +++ b/tests/files/text/asnyk_miedzy_nami_expected.html @@ -1,38 +1,38 @@ -
+

Spis treści

      -
    1. Miłość platoniczna: 1 2
    2. +
    3. Miłość platoniczna: 1
    4. Natura: 1
    5. Nicość: 1

    -Adam AsnykMiłość platonicznaMiędzy nami nic nie było +Adam AsnykMiłość platonicznaMiędzy nami nic nie było

    -Miłość platoniczna
    - 1
    Między nami Nicośćnic nie było! +
    + 1
    Między nami Nicośćnic nie było!
    Żadnych zwierzeń, wyznań żadnych!
    Nic nas z sobą nie łączyło —
    Prócz wiosennych marzeń zdradnych;
    -
    - 5
    -NaturaPrócz tych woni, barw i blasków,
    +
    + 5
    +NaturaPrócz tych woni, barw i blasków,
    Unoszących się w przestrzeni;
    Prócz szumiących śpiewem lasków
    I tej świeżej łąk zieleni;
    -
    -
    Prócz tych kaskad i potoków,
    +
    +
    Prócz tych kaskad i potoków,
    10
    Zraszających każdy parów,
    Prócz girlandy tęcz, obłoków,
    Prócz natury słodkich czarów;
    -
    -
    Prócz tych wspólnych, jasnych zdrojów,
    +
    +
    Prócz tych wspólnych, jasnych zdrojów,
    Z których serce zachwyt piło;
    15
    Prócz pierwiosnków i powojów,—
    Między nami nic nie było! diff --git a/tests/files/text/asnyk_miedzy_nami_expected.legacy.html b/tests/files/text/asnyk_miedzy_nami_expected.legacy.html new file mode 100644 index 0000000..dce71bb --- /dev/null +++ b/tests/files/text/asnyk_miedzy_nami_expected.legacy.html @@ -0,0 +1,41 @@ +
    +
    +

    Spis treści

    +
      +
      +
        +
      1. Miłość platoniczna: 1 2
      2. +
      3. Natura: 1
      4. +
      5. Nicość: 1
      6. +
      +

      +Adam AsnykMiłość platonicznaMiędzy nami nic nie było +

      +Miłość platoniczna
      + 1
      Między nami Nicośćnic nie było! +
      +
      Żadnych zwierzeń, wyznań żadnych!
      +
      Nic nas z sobą nie łączyło —
      +
      Prócz wiosennych marzeń zdradnych;
      +
      +
      + 5
      +NaturaPrócz tych woni, barw i blasków,
      +
      Unoszących się w przestrzeni;
      +
      Prócz szumiących śpiewem lasków
      +
      I tej świeżej łąk zieleni;
      +
      +
      +
      Prócz tych kaskad i potoków,
      + 10
      Zraszających każdy parów,
      +
      Prócz girlandy tęcz, obłoków,
      +
      Prócz natury słodkich czarów;
      +
      +
      +
      Prócz tych wspólnych, jasnych zdrojów,
      +
      Z których serce zachwyt piło;
      + 15
      Prócz pierwiosnków i powojów,—
      +
      Między nami nic nie było! +
      +
      +
      diff --git a/tests/files/text/asnyk_miedzy_nami_fragments.html b/tests/files/text/asnyk_miedzy_nami_fragments.html index ac1658e..2a5713c 100644 --- a/tests/files/text/asnyk_miedzy_nami_fragments.html +++ b/tests/files/text/asnyk_miedzy_nami_fragments.html @@ -4,6 +4,8 @@ 1189062500041: Miłość platoniczna +

      Między nami nic nie było +

      Między nami nic nie było!
      diff --git a/tests/test_html.py b/tests/test_html.py index 7846376..36651fa 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -8,7 +8,8 @@ from __future__ import unicode_literals import io from unittest import TestCase from librarian import NoDublinCore -from librarian.parser import WLDocument +from librarian.document import WLDocument +from librarian.parser import WLDocument as LegacyWLDocument from nose.tools import * from .utils import get_fixture @@ -16,33 +17,41 @@ from .utils import get_fixture class TransformTest(TestCase): maxDiff = None - def test_transform(self): - expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html') + def test_transform_legacy(self): + expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.legacy.html') - html = WLDocument.from_file( + html = LegacyWLDocument.from_file( get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') ).as_html().get_bytes().decode('utf-8') self.assertEqual(html, io.open(expected_output_file_path).read()) + def test_transform(self): + expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.html') + html = WLDocument( + filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') + ).build('html').get_bytes().decode('utf-8') + + self.assertEqual(html, io.open(expected_output_file_path).read()) + @raises(NoDublinCore) def test_no_dublincore(): - WLDocument.from_file( + LegacyWLDocument.from_file( get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') ).as_html() def test_passing_parse_dublincore_to_transform(): """Passing parse_dublincore=False to transform omits DublinCore parsing.""" - WLDocument.from_file( + LegacyWLDocument.from_file( get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), parse_dublincore=False, ).as_html() def test_empty(): - assert not WLDocument.from_bytes( + assert not LegacyWLDocument.from_bytes( b'', parse_dublincore=False, ).as_html() -- 2.20.1