From 55fbc767e5592cad565cff0d16b9638d01b6706f Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Mon, 16 Sep 2024 12:33:02 +0200 Subject: [PATCH] Text converter updated. --- setup.py | 2 +- src/librarian/builders/txt.py | 27 +++++---------- src/librarian/elements/__init__.py | 14 ++++---- src/librarian/elements/base.py | 33 +++++++++++-------- src/librarian/elements/blocks/dedykacja.py | 3 +- src/librarian/elements/blocks/dlugi_cytat.py | 4 +-- src/librarian/elements/blocks/poezja_cyt.py | 2 -- src/librarian/elements/drama/didaskalia.py | 2 -- src/librarian/elements/drama/lista_osob.py | 2 -- src/librarian/elements/drama/lista_osoba.py | 2 -- .../elements/drama/naglowek_osoba.py | 2 -- src/librarian/elements/figures/kol.py | 1 + src/librarian/elements/figures/tabela.py | 4 +++ src/librarian/elements/figures/wiersz.py | 3 ++ src/librarian/elements/front/autor_utworu.py | 1 - .../elements/front/dzielo_nadrzedne.py | 1 - src/librarian/elements/front/motto.py | 4 +-- src/librarian/elements/front/nazwa_utworu.py | 1 - src/librarian/elements/front/podtytul.py | 1 - .../elements/headers/naglowek_czesc.py | 2 -- .../elements/headers/naglowek_podrozdzial.py | 2 -- .../elements/headers/naglowek_rozdzial.py | 2 -- .../elements/headers/naglowek_scena.py | 2 -- src/librarian/elements/masters/__init__.py | 2 +- src/librarian/elements/paragraphs/akap.py | 2 -- src/librarian/elements/poetry/strofa.py | 2 -- src/librarian/elements/poetry/wers.py | 2 -- .../elements/separators/sekcja_asterysk.py | 2 -- .../elements/separators/sekcja_swiatlo.py | 1 - .../elements/separators/separator_linia.py | 2 -- src/librarian/elements/styles/__init__.py | 2 +- src/librarian/elements/styles/mat.py | 30 +++++++++++++++++ src/librarian/elements/tools/__init__.py | 10 ++++++ tests/files/tags/didaskalia/1.expected.html | 4 +-- tests/files/tags/kwestia/1.expected.html | 2 +- tests/files/tags/kwestia/2.expected.html | 4 +-- tests/files/tags/motto_podpis/1.expected.html | 2 +- .../files/tags/naglowek_osoba/1.expected.html | 2 +- tests/files/tags/nota/2.expected.html | 2 +- tests/files/tags/nota/3.expected.html | 4 +-- tests/files/tags/poezja_cyt/1.expected.html | 2 +- tests/files/tags/slowo_obce/1.expected.html | 2 +- tests/files/tags/tabela/1.expected.html | 32 +++++++++--------- tests/files/tags/tytul_dziela/1.expected.html | 2 +- tests/files/tags/wers_wciety/1.expected.html | 2 +- tests/files/tags/wers_wciety/2.expected.html | 18 +++++----- tests/files/tags/wers_wciety/2.xml | 14 ++++---- tests/files/tags/wyroznienie/1.expected.html | 2 +- .../text/asnyk_miedzy_nami_expected_raw.txt | 2 -- tests/test_text.py | 31 ++--------------- 50 files changed, 142 insertions(+), 157 deletions(-) diff --git a/setup.py b/setup.py index 184db52..2af0a05 100755 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ setup( "librarian=librarian.command_line:main" ] }, - scripts=['scripts/book2html', + scripts=[ 'scripts/book2txt', 'scripts/book2pdf', 'scripts/book2fb2', diff --git a/src/librarian/builders/txt.py b/src/librarian/builders/txt.py index 8302de5..28fe709 100644 --- a/src/librarian/builders/txt.py +++ b/src/librarian/builders/txt.py @@ -15,14 +15,6 @@ class TxtFragment: self.current_margin = 0 self.starting_block = True - def push_legacy_margin(self, margin): - if margin: - if self.pieces: - self.pieces[-1] = self.pieces[-1].rstrip(' ') - self.pieces.append('\r\n' * margin) - self.current_margin += margin - self.starting_block = True - def push_margin(self, margin): if margin: if self.pieces: @@ -47,8 +39,11 @@ class TxtBuilder: """ file_extension = "txt" identifier = "txt" + after_child_fn = 'txt_after_child' + debug = False orphans = False + normalize_whitespace = True default_license_description = { "pol": ( @@ -107,9 +102,6 @@ class TxtBuilder: def push_margin(self, margin): self.current_fragments[-1].push_margin(margin) - def push_legacy_margin(self, margin, where=None): - self.current_fragments[-1].push_legacy_margin(margin) - def build(self, document, raw_text=False, **kwargs): document.tree.getroot().txt_build(self) meta = document.meta @@ -123,27 +115,26 @@ class TxtBuilder: for translator in meta.translators ) ) - #builder.push_margin(2) - self.push_legacy_margin(1) + builder.push_margin(2) if meta.isbn_txt: - #builder.push_margin(2) - self.push_legacy_margin(1) + self.push_margin(2) isbn = meta.isbn_txt if isbn.startswith(('ISBN-' , 'ISBN ')): isbn = isbn[5:] self.push_text('ISBN {isbn}'.format(isbn=isbn)) #builder.push_margin(5) - #builder.push_margin(4) - self.push_legacy_margin(1) + self.push_margin(4) self.exit_fragment() - text = ''.join(self.fragments['header'].pieces) + ''.join(self.fragments[None].pieces) + text = ''.join(self.fragments[None].pieces).lstrip() if raw_text: result = text else: + text = ''.join(self.fragments['header'].pieces) + text + if meta.license: license_description = self.license_description['pol'].format(meta=meta) else: diff --git a/src/librarian/elements/__init__.py b/src/librarian/elements/__init__.py index b08c3e1..0191ae5 100644 --- a/src/librarian/elements/__init__.py +++ b/src/librarian/elements/__init__.py @@ -132,11 +132,11 @@ WL_ELEMENTS = { "wywiad_odp": blocks.WywiadOdp, # Inline MathML, should really be namespaced. - "mrow": etree.ElementBase, - "mi": etree.ElementBase, - "mo": etree.ElementBase, - "msup": etree.ElementBase, - "mn": etree.ElementBase, - "mfrac": etree.ElementBase, - "mfenced": etree.ElementBase, + "mrow": styles.MRow, + "mi": styles.M, + "mo": styles.M, + "msup": styles.MSup, + "mn": styles.M, + "mfrac": styles.MFrac, + "mfenced": styles.MFenced, } diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py index 2349f16..060449c 100644 --- a/src/librarian/elements/base.py +++ b/src/librarian/elements/base.py @@ -45,12 +45,11 @@ class WLElement(etree.ElementBase): text_substitutions = [ ('---', '—'), ('--', '–'), - #('...', '…'), # Temporary turnoff for epub + ('...', '…'), (',,', '„'), ('"', '”'), ('\ufeff', ''), - - ("'", "\u2019"), # This was enabled for epub. + ("'", "\u2019"), ] @property @@ -118,9 +117,9 @@ class WLElement(etree.ElementBase): text = text or '' for e, s in self.text_substitutions: text = text.replace(e, s) - # FIXME: TEmporary turnoff -# text = re.sub(r'\s+', ' ', text) -### TODO: Added now for epub + + if getattr(builder, 'normalize_whitespace', False): + text = re.sub(r'\s+', ' ', text) if getattr(builder, 'hyphenator', None) is not None: newt = '' @@ -144,8 +143,12 @@ class WLElement(etree.ElementBase): text = text.rstrip() builder.push_text(text) for i, child in enumerate(self): + real_child_count = 0 if isinstance(child, WLElement): getattr(child, build_method)(builder) + self.after_child(builder, real_child_count) + real_child_count += 1 + # FIXME base builder api elif getattr(builder, 'debug', False) and child.tag is etree.Comment: builder.process_comment(child) @@ -155,21 +158,23 @@ class WLElement(etree.ElementBase): text = text.rstrip() builder.push_text(text) + def after_child(self, builder, child_count): + fn = getattr(builder, 'after_child_fn', None) + if fn: + getattr(self, builder.after_child_fn)(builder, child_count) + + def txt_after_child(self, builder, child_count): + pass + def _txt_build_inner(self, builder): self._build_inner(builder, 'txt_build') def txt_build(self, builder): - if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'): - builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN) - else: - builder.push_margin(self.TXT_TOP_MARGIN) + builder.push_margin(self.TXT_TOP_MARGIN) builder.push_text(self.TXT_PREFIX, True) self._txt_build_inner(builder) builder.push_text(self.TXT_SUFFIX, True) - if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'): - builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN) - else: - builder.push_margin(self.TXT_BOTTOM_MARGIN) + builder.push_margin(self.TXT_BOTTOM_MARGIN) def _html_build_inner(self, builder): self._build_inner(builder, 'html_build') diff --git a/src/librarian/elements/blocks/dedykacja.py b/src/librarian/elements/blocks/dedykacja.py index 265b268..ae00b72 100644 --- a/src/librarian/elements/blocks/dedykacja.py +++ b/src/librarian/elements/blocks/dedykacja.py @@ -7,7 +7,8 @@ from ..base import WLElement class Dedykacja(WLElement): NUMBERING = 'i' - TXT_LEGACY_TOP_MARGIN = 2 + TXT_TOP_MARGIN = 2 + TXT_BOTTOM_MARGIN = 4 EPUB_TAG = HTML_TAG = "div" EPUB_CLASS = HTML_CLASS = "dedication" diff --git a/src/librarian/elements/blocks/dlugi_cytat.py b/src/librarian/elements/blocks/dlugi_cytat.py index 0a55833..bdac225 100644 --- a/src/librarian/elements/blocks/dlugi_cytat.py +++ b/src/librarian/elements/blocks/dlugi_cytat.py @@ -8,9 +8,7 @@ class DlugiCytat(WLElement): CAN_HAVE_TEXT = False TXT_TOP_MARGIN = 3 - TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 0 + TXT_BOTTOM_MARGIN = 3 HTML_TAG = 'blockquote' diff --git a/src/librarian/elements/blocks/poezja_cyt.py b/src/librarian/elements/blocks/poezja_cyt.py index 2b351ca..dd26d13 100644 --- a/src/librarian/elements/blocks/poezja_cyt.py +++ b/src/librarian/elements/blocks/poezja_cyt.py @@ -9,8 +9,6 @@ class PoezjaCyt(WLElement): TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 3 - TXT_LEGACY_TOP_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = 'blockquote' diff --git a/src/librarian/elements/drama/didaskalia.py b/src/librarian/elements/drama/didaskalia.py index 8f461dd..b6b53f9 100644 --- a/src/librarian/elements/drama/didaskalia.py +++ b/src/librarian/elements/drama/didaskalia.py @@ -9,8 +9,6 @@ class Didaskalia(WLElement): TXT_TOP_PARGIN = 2 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 2 - TXT_LEGACY_BOTTOM_MARGIN = 0 TXT_PREFIX = "/ " TXT_SUFFIX = " /" diff --git a/src/librarian/elements/drama/lista_osob.py b/src/librarian/elements/drama/lista_osob.py index f9e210d..d8ea32a 100644 --- a/src/librarian/elements/drama/lista_osob.py +++ b/src/librarian/elements/drama/lista_osob.py @@ -11,8 +11,6 @@ class ListaOsob(WLElement): TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 3 - TXT_LEGACY_TOP_MARGIN = 3 - TXT_LEGACY_BOTTOM_MARGIN = 1 HTML_TAG = "div" HTML_CLASS = "person-list" diff --git a/src/librarian/elements/drama/lista_osoba.py b/src/librarian/elements/drama/lista_osoba.py index 2341918..7319321 100644 --- a/src/librarian/elements/drama/lista_osoba.py +++ b/src/librarian/elements/drama/lista_osoba.py @@ -9,8 +9,6 @@ class ListaOsoba(WLElement): TXT_TOP_MARGIN = 1 TXT_BOTTOM_MARGIN = 1 - TXT_LEGACY_TOP_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 0 TXT_PREFIX = " * " EPUB_TAG = HTML_TAG = "li" diff --git a/src/librarian/elements/drama/naglowek_osoba.py b/src/librarian/elements/drama/naglowek_osoba.py index 43aa1d3..6ff6761 100644 --- a/src/librarian/elements/drama/naglowek_osoba.py +++ b/src/librarian/elements/drama/naglowek_osoba.py @@ -9,8 +9,6 @@ class NaglowekOsoba(WLElement): TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 3 - TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = "h4" diff --git a/src/librarian/elements/figures/kol.py b/src/librarian/elements/figures/kol.py index 9b425de..fa09fc7 100644 --- a/src/librarian/elements/figures/kol.py +++ b/src/librarian/elements/figures/kol.py @@ -6,3 +6,4 @@ from ..base import WLElement class Kol(WLElement): EPUB_TAG = HTML_TAG = 'td' + TXT_PREFIX = ' ' * 4 diff --git a/src/librarian/elements/figures/tabela.py b/src/librarian/elements/figures/tabela.py index 783fcb6..5ab04e0 100644 --- a/src/librarian/elements/figures/tabela.py +++ b/src/librarian/elements/figures/tabela.py @@ -6,6 +6,10 @@ from ..base import WLElement class Tabela(WLElement): NUMBERING = 'i' + CAN_HAVE_TEXT = False + + TXT_TOP_MARGIN = 3 + TXT_BOTTOM_MARGIN = 3 EPUB_TAG = HTML_TAG = 'table' diff --git a/src/librarian/elements/figures/wiersz.py b/src/librarian/elements/figures/wiersz.py index c5d355a..c013558 100644 --- a/src/librarian/elements/figures/wiersz.py +++ b/src/librarian/elements/figures/wiersz.py @@ -5,4 +5,7 @@ from ..base import WLElement class Wiersz(WLElement): + CAN_HAVE_TEXT = False EPUB_TAG = HTML_TAG = 'tr' + TXT_TOP_MARGIN = 1 + TXT_BOTTOM_MARGIN = 1 diff --git a/src/librarian/elements/front/autor_utworu.py b/src/librarian/elements/front/autor_utworu.py index 3e35b0b..0f71cb5 100644 --- a/src/librarian/elements/front/autor_utworu.py +++ b/src/librarian/elements/front/autor_utworu.py @@ -6,7 +6,6 @@ from .base import HeaderElement class AutorUtworu(HeaderElement): TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_BOTTOM_MARGIN = 2 HTML_CLASS = 'wl author' diff --git a/src/librarian/elements/front/dzielo_nadrzedne.py b/src/librarian/elements/front/dzielo_nadrzedne.py index cc70164..cc16a81 100644 --- a/src/librarian/elements/front/dzielo_nadrzedne.py +++ b/src/librarian/elements/front/dzielo_nadrzedne.py @@ -6,7 +6,6 @@ from .base import HeaderElement class DzieloNadrzedne(HeaderElement): TXT_BOTTOM_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 1 HTML_CLASS = "wl collection" diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py index a6769b8..ac9f209 100644 --- a/src/librarian/elements/front/motto.py +++ b/src/librarian/elements/front/motto.py @@ -7,8 +7,8 @@ from ..base import WLElement class Motto(WLElement): NUMBERING = 'i' - TXT_LEGACY_TOP_MARGIN = 4 - TXT_LEGACY_BOTTOM_MARGIN = 2 + TXT_TOP_MARGIN = 4 + TXT_BOTTOM_MARGIN = 2 EPUB_TAG = HTML_TAG = "div" EPUB_CLASS = HTML_CLASS = "motto" diff --git a/src/librarian/elements/front/nazwa_utworu.py b/src/librarian/elements/front/nazwa_utworu.py index dd40266..9e3bd51 100644 --- a/src/librarian/elements/front/nazwa_utworu.py +++ b/src/librarian/elements/front/nazwa_utworu.py @@ -6,7 +6,6 @@ from .base import HeaderElement class NazwaUtworu(HeaderElement): TXT_BOTTOM_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 1 HTML_CLASS = 'wl title' diff --git a/src/librarian/elements/front/podtytul.py b/src/librarian/elements/front/podtytul.py index 711b3fa..01991d5 100644 --- a/src/librarian/elements/front/podtytul.py +++ b/src/librarian/elements/front/podtytul.py @@ -6,7 +6,6 @@ from .base import HeaderElement class Podtytul(HeaderElement): TXT_BOTTOM_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 1 HTML_CLASS = 'wl subtitle' diff --git a/src/librarian/elements/headers/naglowek_czesc.py b/src/librarian/elements/headers/naglowek_czesc.py index 11991ae..7d2fa0d 100644 --- a/src/librarian/elements/headers/naglowek_czesc.py +++ b/src/librarian/elements/headers/naglowek_czesc.py @@ -10,8 +10,6 @@ class NaglowekCzesc(WLElement): TXT_TOP_MARGIN = 5 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 5 - TXT_LEGACY_BOTTOM_MARGIN = 0 EPUB_TAG = HTML_TAG = "h2" HTML_CLASS = "wl" diff --git a/src/librarian/elements/headers/naglowek_podrozdzial.py b/src/librarian/elements/headers/naglowek_podrozdzial.py index 213ec00..6865362 100644 --- a/src/librarian/elements/headers/naglowek_podrozdzial.py +++ b/src/librarian/elements/headers/naglowek_podrozdzial.py @@ -10,8 +10,6 @@ class NaglowekPodrozdzial(WLElement): TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 3 - TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = "h4" diff --git a/src/librarian/elements/headers/naglowek_rozdzial.py b/src/librarian/elements/headers/naglowek_rozdzial.py index 502dea1..e61f6a7 100644 --- a/src/librarian/elements/headers/naglowek_rozdzial.py +++ b/src/librarian/elements/headers/naglowek_rozdzial.py @@ -10,8 +10,6 @@ class NaglowekRozdzial(WLElement): TXT_TOP_MARGIN = 4 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 4 - TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = 'h3' HTML_CLASS = 'wl' diff --git a/src/librarian/elements/headers/naglowek_scena.py b/src/librarian/elements/headers/naglowek_scena.py index e9ea5c7..a0ae5be 100644 --- a/src/librarian/elements/headers/naglowek_scena.py +++ b/src/librarian/elements/headers/naglowek_scena.py @@ -10,8 +10,6 @@ class NaglowekScena(WLElement): TXT_TOP_MARGIN = 4 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 4 - TXT_LEGACY_BOTTOM_MARGIN = 0 HTML_TAG = 'h3' diff --git a/src/librarian/elements/masters/__init__.py b/src/librarian/elements/masters/__init__.py index d4f3cf0..54f5ba1 100644 --- a/src/librarian/elements/masters/__init__.py +++ b/src/librarian/elements/masters/__init__.py @@ -7,4 +7,4 @@ from ..base import WLElement class Master(WLElement): CAN_HAVE_TEXT = False - TXT_LEGACY_BOTTOM_MARGIN = 2 + TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py index 63f74b5..6ee10d7 100644 --- a/src/librarian/elements/paragraphs/akap.py +++ b/src/librarian/elements/paragraphs/akap.py @@ -10,8 +10,6 @@ class Akap(WLElement): TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 2 - TXT_LEGACY_BOTTOM_MARGIN = 0 EPUB_CLASS = 'paragraph' diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py index bccb01b..f698bc3 100644 --- a/src/librarian/elements/poetry/strofa.py +++ b/src/librarian/elements/poetry/strofa.py @@ -12,8 +12,6 @@ class Strofa(WLElement): TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 - TXT_LEGACY_TOP_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 0 EPUB_TAG = HTML_TAG = 'div' EPUB_CLASS = HTML_CLASS = 'stanza' diff --git a/src/librarian/elements/poetry/wers.py b/src/librarian/elements/poetry/wers.py index 127d5bd..cf2089c 100644 --- a/src/librarian/elements/poetry/wers.py +++ b/src/librarian/elements/poetry/wers.py @@ -9,8 +9,6 @@ class Wers(WLElement): TXT_TOP_MARGIN = 1 TXT_BOTTOM_MARGIN = 1 - TXT_LEGACY_TOP_MARGIN = 1 - TXT_LEGACY_BOTTOM_MARGIN = 0 EPUB_TAG = HTML_TAG = 'div' EPUB_CLASS = 'verse' diff --git a/src/librarian/elements/separators/sekcja_asterysk.py b/src/librarian/elements/separators/sekcja_asterysk.py index 35bd7a7..3ba8270 100644 --- a/src/librarian/elements/separators/sekcja_asterysk.py +++ b/src/librarian/elements/separators/sekcja_asterysk.py @@ -7,8 +7,6 @@ from ..base import WLElement class SekcjaAsterysk(WLElement): TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 4 - TXT_LEGACY_TOP_MARGIN = 2 - TXT_LEGACY_BOTTOM_MARGIN = 2 EPUB_TAG = HTML_TAG = "p" HTML_CLASS = HTML_CLASS = "spacer-asterisk" diff --git a/src/librarian/elements/separators/sekcja_swiatlo.py b/src/librarian/elements/separators/sekcja_swiatlo.py index 805078e..e3ab103 100644 --- a/src/librarian/elements/separators/sekcja_swiatlo.py +++ b/src/librarian/elements/separators/sekcja_swiatlo.py @@ -6,7 +6,6 @@ from ..base import WLElement class SekcjaSwiatlo(WLElement): TXT_BOTTOM_MARGIN = 6 - TXT_LEGACY_BOTTOM_MARGIN = 4 HTML_TAG = "hr" HTML_CLASS = "spacer" diff --git a/src/librarian/elements/separators/separator_linia.py b/src/librarian/elements/separators/separator_linia.py index 8874967..e5a5218 100644 --- a/src/librarian/elements/separators/separator_linia.py +++ b/src/librarian/elements/separators/separator_linia.py @@ -7,8 +7,6 @@ from ..base import WLElement class SeparatorLinia(WLElement): TXT_TOP_MARGIN = 4 TXT_BOTTOM_MARGIN = 4 - TXT_LEGACY_TOP_MARGIN = 2 - TXT_LEGACY_BOTTOM_MARGIN = 2 EPUB_TAG = HTML_TAG = "hr" EPUB_CLASS = HTML_CLASS = "spacer-line" diff --git a/src/librarian/elements/styles/__init__.py b/src/librarian/elements/styles/__init__.py index 4453490..dd35dfc 100644 --- a/src/librarian/elements/styles/__init__.py +++ b/src/librarian/elements/styles/__init__.py @@ -2,7 +2,7 @@ # Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # from .indeks_dolny import IndeksDolny -from .mat import Mat +from .mat import Mat, M, MRow, MFenced, MFrac, MSup from .slowo_obce import SlowoObce from .tytul_dziela import TytulDziela from .wieksze_odstepy import WiekszeOdstepy diff --git a/src/librarian/elements/styles/mat.py b/src/librarian/elements/styles/mat.py index 68fcc3f..1512c32 100644 --- a/src/librarian/elements/styles/mat.py +++ b/src/librarian/elements/styles/mat.py @@ -6,6 +6,8 @@ from ..base import WLElement class Mat(WLElement): + STRIP = True + def html_build(self, builder): e = copy(self) e.tag = 'math' @@ -15,3 +17,31 @@ class Mat(WLElement): def epub_build(self, builder): builder.start_element('img', {"src": builder.mathml(self)}) builder.end_element() + + +class M(WLElement): + STRIP = True + + +class MRow(M): + pass + + +class MFenced(M): + TXT_PREFIX = '(' + TXT_SUFFIX = ')' + + +class MFrac(M): + TXT_PREFIX = '(' + TXT_SUFFIX = ')' + + def txt_after_child(self, builder, child_count): + if child_count: + builder.push_text(') / (') + + +class MSup(M): + def txt_after_child(self, builder, child_count): + if child_count: + builder.push_text(' ^ ') diff --git a/src/librarian/elements/tools/__init__.py b/src/librarian/elements/tools/__init__.py index c4820b1..69a26ea 100644 --- a/src/librarian/elements/tools/__init__.py +++ b/src/librarian/elements/tools/__init__.py @@ -70,3 +70,13 @@ class Tab(WLElement): } get_epub_attr = get_html_attr + + def txt_build(self, builder): + szer = self.get('szer', '1').strip() + if szer.endswith('em'): + szer = szer[:-2] + try: + szer = int(szer) + except: + szer = 1 + builder.push_text(' ' * 4 * szer) diff --git a/tests/files/tags/didaskalia/1.expected.html b/tests/files/tags/didaskalia/1.expected.html index efefbab..88137d5 100644 --- a/tests/files/tags/didaskalia/1.expected.html +++ b/tests/files/tags/didaskalia/1.expected.html @@ -8,7 +8,7 @@ Na toż bym się, mocium panie,
Kawalerstwa dziś w
uderzając w stół
-
By kto... niech go piorun trzaśnie!
+
By kto… niech go piorun trzaśnie!
Długo będzie na to czekał,
po krótkim milczeniu, biorąc talerz
@@ -21,7 +21,7 @@ Na toż bym się, mocium panie,
Kawalerstwa dziś w
Bawi z nami — w domu Klary,
Bo krewniaczka jej daleka,
-10
Ale mnie się wszystko zdaje...
+10
Ale mnie się wszystko zdaje…
diff --git a/tests/files/tags/kwestia/1.expected.html b/tests/files/tags/kwestia/1.expected.html index 2fe9773..54109b5 100644 --- a/tests/files/tags/kwestia/1.expected.html +++ b/tests/files/tags/kwestia/1.expected.html @@ -4,7 +4,7 @@ 1
Więc jako dawniej czynili mocarze,
Z Lechem się mieniał Scyta na obrączki;
A pokochawszy mocniej sercem, w darze
-
Dał mu koronę... stąd nasza korona.
+
Dał mu koronę… stąd nasza korona.
5
Zbawiciel niegdyś wyciągając rączki
Szedł do niej z matki zadumanej łona
I ku rubinom podawał się cały
diff --git a/tests/files/tags/kwestia/2.expected.html b/tests/files/tags/kwestia/2.expected.html index 5783d97..b73287a 100644 --- a/tests/files/tags/kwestia/2.expected.html +++ b/tests/files/tags/kwestia/2.expected.html @@ -1,11 +1,11 @@

GŁOS HESI

-1

Mamuńciu, tak zimno! troszkę ciepłej wody...

+1

Mamuńciu, tak zimno! troszkę ciepłej wody…

DULSKA

-2

Jeszcze czego? Hartujcie się... Felicjan! wstajesz? Wiesz? ten błazen, twój syn, nie wrócił jeszcze do domu! Co? nic nie mówisz? naturalnie. +2

Jeszcze czego? Hartujcie się… Felicjan! wstajesz? Wiesz? ten błazen, twój syn, nie wrócił jeszcze do domu! Co? nic nie mówisz? naturalnie. Ojciec toleruje. Niedaleko padło jabłko od jabłoni. Ale jak będą dłużki małe — nie zapłacę.

diff --git a/tests/files/tags/motto_podpis/1.expected.html b/tests/files/tags/motto_podpis/1.expected.html index 1ff589b..ec6cecb 100644 --- a/tests/files/tags/motto_podpis/1.expected.html +++ b/tests/files/tags/motto_podpis/1.expected.html @@ -5,7 +5,7 @@
1
Dovete adunque sapere come sono/ -due generazioni da combattere...../ +due generazioni da combattere…../ bisogna essere volpe e leone.
diff --git a/tests/files/tags/naglowek_osoba/1.expected.html b/tests/files/tags/naglowek_osoba/1.expected.html index 4903bb3..1bbd782 100644 --- a/tests/files/tags/naglowek_osoba/1.expected.html +++ b/tests/files/tags/naglowek_osoba/1.expected.html @@ -6,7 +6,7 @@ 1
Piękne dobra w każdym względzie —
Lasy — gleba wyśmienita —
Dobrą żoną pewnie będzie —
-
Co za czynsze! — To kobiéta!...
+
Co za czynsze! — To kobiéta!…
5
Trzy folwarki!
diff --git a/tests/files/tags/nota/2.expected.html b/tests/files/tags/nota/2.expected.html index 6006ef6..d78347f 100644 --- a/tests/files/tags/nota/2.expected.html +++ b/tests/files/tags/nota/2.expected.html @@ -12,7 +12,7 @@
I nie przyjmował nigdy, jak wiek wiekiem.
Bo glina w glinę wtapia się bez przerwy,
5
Gdy sprzeczne ciała zbija się aż ćwiekiem
-
Później... lub pierwéj...
+
Później… lub pierwéj…

Pisałem w Paryżu 1856 w styczniu.

diff --git a/tests/files/tags/nota/3.expected.html b/tests/files/tags/nota/3.expected.html index b7d62b4..243db4a 100644 --- a/tests/files/tags/nota/3.expected.html +++ b/tests/files/tags/nota/3.expected.html @@ -1,8 +1,8 @@
1

Jego marzenie się nie spełniło: nie ożenił się, choć był już na to zupełnie zdecydowany, -gdy skończono oporządzać jego mieszkanie. [...] Rzecz dziwna, czym jest mundur dla takich ludzi.

+gdy skończono oporządzać jego mieszkanie. […] Rzecz dziwna, czym jest mundur dla takich ludzi.

Uwaga tłumacza. Drukowane obecnie w „Nowej Reformie” Wspomnienia Sybiraka (pamiętniki Józefa Bogusławskiego) pozwalają szczęśliwym trafem podać w całości nazwiska Polaków, o których mówi Dostojewski w swoich pamiętnikach, -oznaczając te nazwiska tylko pierwszymi literami, z dodaniem czasami końcówek. [...]

+oznaczając te nazwiska tylko pierwszymi literami, z dodaniem czasami końcówek. […]

diff --git a/tests/files/tags/poezja_cyt/1.expected.html b/tests/files/tags/poezja_cyt/1.expected.html index b0d195b..17e1d92 100644 --- a/tests/files/tags/poezja_cyt/1.expected.html +++ b/tests/files/tags/poezja_cyt/1.expected.html @@ -1,4 +1,4 @@
1
Tymczasem przenoś duszę moją utęsknioną
-
Do tych pagórków leśnych, do tych łąk zielonych...
+
Do tych pagórków leśnych, do tych łąk zielonych…
diff --git a/tests/files/tags/slowo_obce/1.expected.html b/tests/files/tags/slowo_obce/1.expected.html index 8fa6808..5c8ce78 100644 --- a/tests/files/tags/slowo_obce/1.expected.html +++ b/tests/files/tags/slowo_obce/1.expected.html @@ -1,3 +1,3 @@
-1

Na czwarty dzień przywiózł mu stójka z osłowickiej apteki diachylum; Zołzikiewicz rozsmarował na płatek...

+1

Na czwarty dzień przywiózł mu stójka z osłowickiej apteki diachylum; Zołzikiewicz rozsmarował na płatek…

diff --git a/tests/files/tags/tabela/1.expected.html b/tests/files/tags/tabela/1.expected.html index fe9ef5f..f839494 100644 --- a/tests/files/tags/tabela/1.expected.html +++ b/tests/files/tags/tabela/1.expected.html @@ -1,22 +1,22 @@
- - - - - - - - + + + + + + + +
ab
cd
ab
cd
- - - - - - - - + + + + + + + +
ab
cd
ab
cd
diff --git a/tests/files/tags/tytul_dziela/1.expected.html b/tests/files/tags/tytul_dziela/1.expected.html index 364942d..f246117 100644 --- a/tests/files/tags/tytul_dziela/1.expected.html +++ b/tests/files/tags/tytul_dziela/1.expected.html @@ -4,5 +4,5 @@ Kilka uwag o „Hamlecie” Szekspira - pióra...

+ pióra…

diff --git a/tests/files/tags/wers_wciety/1.expected.html b/tests/files/tags/wers_wciety/1.expected.html index 0ee7c74..b2254fb 100644 --- a/tests/files/tags/wers_wciety/1.expected.html +++ b/tests/files/tags/wers_wciety/1.expected.html @@ -1,6 +1,6 @@

-Julisz SłowackiAnioł ognisty — mój anioł lewy... +Julisz SłowackiAnioł ognisty — mój anioł lewy…

1
Anioł ognisty — mój anioł lewy
diff --git a/tests/files/tags/wers_wciety/2.expected.html b/tests/files/tags/wers_wciety/2.expected.html index 85b8e22..f502985 100644 --- a/tests/files/tags/wers_wciety/2.expected.html +++ b/tests/files/tags/wers_wciety/2.expected.html @@ -1,8 +1,10 @@ -
1
zwykły
-
wcięty
-
wcięty 1
-
wcięty 2
- 5
wcięty 25
-
akapitowy
-
środek
-
do prawej
+
+1
zwykły
+
wcięty
+
wcięty 1
+
wcięty 2
+5
wcięty 25
+
akapitowy
+
środek
+
do prawej
+
diff --git a/tests/files/tags/wers_wciety/2.xml b/tests/files/tags/wers_wciety/2.xml index c31d2cb..9ce9f6a 100644 --- a/tests/files/tags/wers_wciety/2.xml +++ b/tests/files/tags/wers_wciety/2.xml @@ -1,10 +1,10 @@ - zwykły - wcięty - wcięty 1 - wcięty 2 - wcięty 25 - akapitowy - środek + zwykły/ + wcięty/ + wcięty 1/ + wcięty 2/ + wcięty 25/ + akapitowy/ + środek/ do prawej diff --git a/tests/files/tags/wyroznienie/1.expected.html b/tests/files/tags/wyroznienie/1.expected.html index 65cffe3..d160084 100644 --- a/tests/files/tags/wyroznienie/1.expected.html +++ b/tests/files/tags/wyroznienie/1.expected.html @@ -5,5 +5,5 @@
Blade jak świt,
5
— Gdy życia koniec szepce do początku:
-„Nie stargam cię ja — nie! — Ja, u-wydatnię!...”
+„Nie stargam cię ja — nie! — Ja, u-wydatnię!…”
diff --git a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt index cac61d8..29e243a 100644 --- a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt +++ b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt @@ -1,5 +1,3 @@ - - Między nami nic nie było! Żadnych zwierzeń, wyznań żadnych! Nic nas z sobą nie łączyło — diff --git a/tests/test_text.py b/tests/test_text.py index 7797530..9109413 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -2,9 +2,7 @@ # Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # import unittest -from librarian import NoDublinCore from librarian.builders import builders -from librarian.parser import WLDocument as LegacyWLDocument from librarian.document import WLDocument from .utils import get_fixture @@ -12,16 +10,6 @@ from .utils import get_fixture class TextTests(unittest.TestCase): maxDiff = None - def test_transform_legacy(self): - expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt') - - text = LegacyWLDocument.from_file( - get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') - ).as_text().get_bytes().decode('utf-8') - - with open(expected_output_file_path, 'rb') as f: - self.assertEqual(text, f.read().decode('utf-8')) - def test_transform(self): expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt') @@ -36,22 +24,9 @@ class TextTests(unittest.TestCase): def test_transform_raw(self): expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt') - text = LegacyWLDocument.from_file( - get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') - ).as_text(flags=['raw-text']).get_bytes().decode('utf-8') + text = WLDocument( + filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml') + ).build(builders['txt'], raw_text=True).get_bytes().decode('utf-8') with open(expected_output_file_path, 'rb') as f: self.assertEqual(text, f.read().decode('utf-8')) - - def test_no_dublincore(self): - with self.assertRaises(NoDublinCore): - LegacyWLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml') - ).as_text() - - def test_passing_parse_dublincore_to_transform(self): - """Passing parse_dublincore=False to the constructor omits DublinCore parsing.""" - LegacyWLDocument.from_file( - get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'), - parse_dublincore=False, - ).as_text() -- 2.20.1