From: Radek Czajka
Date: Mon, 16 Sep 2024 10:33:02 +0000 (+0200)
Subject: Text converter updated.
X-Git-Tag: 24.9~4
X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/55fbc767e5592cad565cff0d16b9638d01b6706f?ds=inline
Text converter updated.
---
diff --git a/setup.py b/setup.py
index 184db52..2af0a05 100755
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ setup(
"librarian=librarian.command_line:main"
]
},
- scripts=['scripts/book2html',
+ scripts=[
'scripts/book2txt',
'scripts/book2pdf',
'scripts/book2fb2',
diff --git a/src/librarian/builders/txt.py b/src/librarian/builders/txt.py
index 8302de5..28fe709 100644
--- a/src/librarian/builders/txt.py
+++ b/src/librarian/builders/txt.py
@@ -15,14 +15,6 @@ class TxtFragment:
self.current_margin = 0
self.starting_block = True
- def push_legacy_margin(self, margin):
- if margin:
- if self.pieces:
- self.pieces[-1] = self.pieces[-1].rstrip(' ')
- self.pieces.append('\r\n' * margin)
- self.current_margin += margin
- self.starting_block = True
-
def push_margin(self, margin):
if margin:
if self.pieces:
@@ -47,8 +39,11 @@ class TxtBuilder:
"""
file_extension = "txt"
identifier = "txt"
+ after_child_fn = 'txt_after_child'
+ debug = False
orphans = False
+ normalize_whitespace = True
default_license_description = {
"pol": (
@@ -107,9 +102,6 @@ class TxtBuilder:
def push_margin(self, margin):
self.current_fragments[-1].push_margin(margin)
- def push_legacy_margin(self, margin, where=None):
- self.current_fragments[-1].push_legacy_margin(margin)
-
def build(self, document, raw_text=False, **kwargs):
document.tree.getroot().txt_build(self)
meta = document.meta
@@ -123,27 +115,26 @@ class TxtBuilder:
for translator in meta.translators
)
)
- #builder.push_margin(2)
- self.push_legacy_margin(1)
+ builder.push_margin(2)
if meta.isbn_txt:
- #builder.push_margin(2)
- self.push_legacy_margin(1)
+ self.push_margin(2)
isbn = meta.isbn_txt
if isbn.startswith(('ISBN-' , 'ISBN ')):
isbn = isbn[5:]
self.push_text('ISBN {isbn}'.format(isbn=isbn))
#builder.push_margin(5)
- #builder.push_margin(4)
- self.push_legacy_margin(1)
+ self.push_margin(4)
self.exit_fragment()
- text = ''.join(self.fragments['header'].pieces) + ''.join(self.fragments[None].pieces)
+ text = ''.join(self.fragments[None].pieces).lstrip()
if raw_text:
result = text
else:
+ text = ''.join(self.fragments['header'].pieces) + text
+
if meta.license:
license_description = self.license_description['pol'].format(meta=meta)
else:
diff --git a/src/librarian/elements/__init__.py b/src/librarian/elements/__init__.py
index b08c3e1..0191ae5 100644
--- a/src/librarian/elements/__init__.py
+++ b/src/librarian/elements/__init__.py
@@ -132,11 +132,11 @@ WL_ELEMENTS = {
"wywiad_odp": blocks.WywiadOdp,
# Inline MathML, should really be namespaced.
- "mrow": etree.ElementBase,
- "mi": etree.ElementBase,
- "mo": etree.ElementBase,
- "msup": etree.ElementBase,
- "mn": etree.ElementBase,
- "mfrac": etree.ElementBase,
- "mfenced": etree.ElementBase,
+ "mrow": styles.MRow,
+ "mi": styles.M,
+ "mo": styles.M,
+ "msup": styles.MSup,
+ "mn": styles.M,
+ "mfrac": styles.MFrac,
+ "mfenced": styles.MFenced,
}
diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py
index 2349f16..060449c 100644
--- a/src/librarian/elements/base.py
+++ b/src/librarian/elements/base.py
@@ -45,12 +45,11 @@ class WLElement(etree.ElementBase):
text_substitutions = [
('---', 'â'),
('--', 'â'),
- #('...', 'â¦'), # Temporary turnoff for epub
+ ('...', 'â¦'),
(',,', 'â'),
('"', 'â'),
('\ufeff', ''),
-
- ("'", "\u2019"), # This was enabled for epub.
+ ("'", "\u2019"),
]
@property
@@ -118,9 +117,9 @@ class WLElement(etree.ElementBase):
text = text or ''
for e, s in self.text_substitutions:
text = text.replace(e, s)
- # FIXME: TEmporary turnoff
-# text = re.sub(r'\s+', ' ', text)
-### TODO: Added now for epub
+
+ if getattr(builder, 'normalize_whitespace', False):
+ text = re.sub(r'\s+', ' ', text)
if getattr(builder, 'hyphenator', None) is not None:
newt = ''
@@ -144,8 +143,12 @@ class WLElement(etree.ElementBase):
text = text.rstrip()
builder.push_text(text)
for i, child in enumerate(self):
+ real_child_count = 0
if isinstance(child, WLElement):
getattr(child, build_method)(builder)
+ self.after_child(builder, real_child_count)
+ real_child_count += 1
+
# FIXME base builder api
elif getattr(builder, 'debug', False) and child.tag is etree.Comment:
builder.process_comment(child)
@@ -155,21 +158,23 @@ class WLElement(etree.ElementBase):
text = text.rstrip()
builder.push_text(text)
+ def after_child(self, builder, child_count):
+ fn = getattr(builder, 'after_child_fn', None)
+ if fn:
+ getattr(self, builder.after_child_fn)(builder, child_count)
+
+ def txt_after_child(self, builder, child_count):
+ pass
+
def _txt_build_inner(self, builder):
self._build_inner(builder, 'txt_build')
def txt_build(self, builder):
- if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
- builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
- else:
- builder.push_margin(self.TXT_TOP_MARGIN)
+ builder.push_margin(self.TXT_TOP_MARGIN)
builder.push_text(self.TXT_PREFIX, True)
self._txt_build_inner(builder)
builder.push_text(self.TXT_SUFFIX, True)
- if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
- builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
- else:
- builder.push_margin(self.TXT_BOTTOM_MARGIN)
+ builder.push_margin(self.TXT_BOTTOM_MARGIN)
def _html_build_inner(self, builder):
self._build_inner(builder, 'html_build')
diff --git a/src/librarian/elements/blocks/dedykacja.py b/src/librarian/elements/blocks/dedykacja.py
index 265b268..ae00b72 100644
--- a/src/librarian/elements/blocks/dedykacja.py
+++ b/src/librarian/elements/blocks/dedykacja.py
@@ -7,7 +7,8 @@ from ..base import WLElement
class Dedykacja(WLElement):
NUMBERING = 'i'
- TXT_LEGACY_TOP_MARGIN = 2
+ TXT_TOP_MARGIN = 2
+ TXT_BOTTOM_MARGIN = 4
EPUB_TAG = HTML_TAG = "div"
EPUB_CLASS = HTML_CLASS = "dedication"
diff --git a/src/librarian/elements/blocks/dlugi_cytat.py b/src/librarian/elements/blocks/dlugi_cytat.py
index 0a55833..bdac225 100644
--- a/src/librarian/elements/blocks/dlugi_cytat.py
+++ b/src/librarian/elements/blocks/dlugi_cytat.py
@@ -8,9 +8,7 @@ class DlugiCytat(WLElement):
CAN_HAVE_TEXT = False
TXT_TOP_MARGIN = 3
- TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 0
+ TXT_BOTTOM_MARGIN = 3
HTML_TAG = 'blockquote'
diff --git a/src/librarian/elements/blocks/poezja_cyt.py b/src/librarian/elements/blocks/poezja_cyt.py
index 2b351ca..dd26d13 100644
--- a/src/librarian/elements/blocks/poezja_cyt.py
+++ b/src/librarian/elements/blocks/poezja_cyt.py
@@ -9,8 +9,6 @@ class PoezjaCyt(WLElement):
TXT_TOP_MARGIN = 3
TXT_BOTTOM_MARGIN = 3
- TXT_LEGACY_TOP_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 0
HTML_TAG = 'blockquote'
diff --git a/src/librarian/elements/drama/didaskalia.py b/src/librarian/elements/drama/didaskalia.py
index 8f461dd..b6b53f9 100644
--- a/src/librarian/elements/drama/didaskalia.py
+++ b/src/librarian/elements/drama/didaskalia.py
@@ -9,8 +9,6 @@ class Didaskalia(WLElement):
TXT_TOP_PARGIN = 2
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 2
- TXT_LEGACY_BOTTOM_MARGIN = 0
TXT_PREFIX = "/ "
TXT_SUFFIX = " /"
diff --git a/src/librarian/elements/drama/lista_osob.py b/src/librarian/elements/drama/lista_osob.py
index f9e210d..d8ea32a 100644
--- a/src/librarian/elements/drama/lista_osob.py
+++ b/src/librarian/elements/drama/lista_osob.py
@@ -11,8 +11,6 @@ class ListaOsob(WLElement):
TXT_TOP_MARGIN = 3
TXT_BOTTOM_MARGIN = 3
- TXT_LEGACY_TOP_MARGIN = 3
- TXT_LEGACY_BOTTOM_MARGIN = 1
HTML_TAG = "div"
HTML_CLASS = "person-list"
diff --git a/src/librarian/elements/drama/lista_osoba.py b/src/librarian/elements/drama/lista_osoba.py
index 2341918..7319321 100644
--- a/src/librarian/elements/drama/lista_osoba.py
+++ b/src/librarian/elements/drama/lista_osoba.py
@@ -9,8 +9,6 @@ class ListaOsoba(WLElement):
TXT_TOP_MARGIN = 1
TXT_BOTTOM_MARGIN = 1
- TXT_LEGACY_TOP_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 0
TXT_PREFIX = " * "
EPUB_TAG = HTML_TAG = "li"
diff --git a/src/librarian/elements/drama/naglowek_osoba.py b/src/librarian/elements/drama/naglowek_osoba.py
index 43aa1d3..6ff6761 100644
--- a/src/librarian/elements/drama/naglowek_osoba.py
+++ b/src/librarian/elements/drama/naglowek_osoba.py
@@ -9,8 +9,6 @@ class NaglowekOsoba(WLElement):
TXT_TOP_MARGIN = 3
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 3
- TXT_LEGACY_BOTTOM_MARGIN = 0
HTML_TAG = "h4"
diff --git a/src/librarian/elements/figures/kol.py b/src/librarian/elements/figures/kol.py
index 9b425de..fa09fc7 100644
--- a/src/librarian/elements/figures/kol.py
+++ b/src/librarian/elements/figures/kol.py
@@ -6,3 +6,4 @@ from ..base import WLElement
class Kol(WLElement):
EPUB_TAG = HTML_TAG = 'td'
+ TXT_PREFIX = ' ' * 4
diff --git a/src/librarian/elements/figures/tabela.py b/src/librarian/elements/figures/tabela.py
index 783fcb6..5ab04e0 100644
--- a/src/librarian/elements/figures/tabela.py
+++ b/src/librarian/elements/figures/tabela.py
@@ -6,6 +6,10 @@ from ..base import WLElement
class Tabela(WLElement):
NUMBERING = 'i'
+ CAN_HAVE_TEXT = False
+
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 3
EPUB_TAG = HTML_TAG = 'table'
diff --git a/src/librarian/elements/figures/wiersz.py b/src/librarian/elements/figures/wiersz.py
index c5d355a..c013558 100644
--- a/src/librarian/elements/figures/wiersz.py
+++ b/src/librarian/elements/figures/wiersz.py
@@ -5,4 +5,7 @@ from ..base import WLElement
class Wiersz(WLElement):
+ CAN_HAVE_TEXT = False
EPUB_TAG = HTML_TAG = 'tr'
+ TXT_TOP_MARGIN = 1
+ TXT_BOTTOM_MARGIN = 1
diff --git a/src/librarian/elements/front/autor_utworu.py b/src/librarian/elements/front/autor_utworu.py
index 3e35b0b..0f71cb5 100644
--- a/src/librarian/elements/front/autor_utworu.py
+++ b/src/librarian/elements/front/autor_utworu.py
@@ -6,7 +6,6 @@ from .base import HeaderElement
class AutorUtworu(HeaderElement):
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_BOTTOM_MARGIN = 2
HTML_CLASS = 'wl author'
diff --git a/src/librarian/elements/front/dzielo_nadrzedne.py b/src/librarian/elements/front/dzielo_nadrzedne.py
index cc70164..cc16a81 100644
--- a/src/librarian/elements/front/dzielo_nadrzedne.py
+++ b/src/librarian/elements/front/dzielo_nadrzedne.py
@@ -6,7 +6,6 @@ from .base import HeaderElement
class DzieloNadrzedne(HeaderElement):
TXT_BOTTOM_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 1
HTML_CLASS = "wl collection"
diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py
index a6769b8..ac9f209 100644
--- a/src/librarian/elements/front/motto.py
+++ b/src/librarian/elements/front/motto.py
@@ -7,8 +7,8 @@ from ..base import WLElement
class Motto(WLElement):
NUMBERING = 'i'
- TXT_LEGACY_TOP_MARGIN = 4
- TXT_LEGACY_BOTTOM_MARGIN = 2
+ TXT_TOP_MARGIN = 4
+ TXT_BOTTOM_MARGIN = 2
EPUB_TAG = HTML_TAG = "div"
EPUB_CLASS = HTML_CLASS = "motto"
diff --git a/src/librarian/elements/front/nazwa_utworu.py b/src/librarian/elements/front/nazwa_utworu.py
index dd40266..9e3bd51 100644
--- a/src/librarian/elements/front/nazwa_utworu.py
+++ b/src/librarian/elements/front/nazwa_utworu.py
@@ -6,7 +6,6 @@ from .base import HeaderElement
class NazwaUtworu(HeaderElement):
TXT_BOTTOM_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 1
HTML_CLASS = 'wl title'
diff --git a/src/librarian/elements/front/podtytul.py b/src/librarian/elements/front/podtytul.py
index 711b3fa..01991d5 100644
--- a/src/librarian/elements/front/podtytul.py
+++ b/src/librarian/elements/front/podtytul.py
@@ -6,7 +6,6 @@ from .base import HeaderElement
class Podtytul(HeaderElement):
TXT_BOTTOM_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 1
HTML_CLASS = 'wl subtitle'
diff --git a/src/librarian/elements/headers/naglowek_czesc.py b/src/librarian/elements/headers/naglowek_czesc.py
index 11991ae..7d2fa0d 100644
--- a/src/librarian/elements/headers/naglowek_czesc.py
+++ b/src/librarian/elements/headers/naglowek_czesc.py
@@ -10,8 +10,6 @@ class NaglowekCzesc(WLElement):
TXT_TOP_MARGIN = 5
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 5
- TXT_LEGACY_BOTTOM_MARGIN = 0
EPUB_TAG = HTML_TAG = "h2"
HTML_CLASS = "wl"
diff --git a/src/librarian/elements/headers/naglowek_podrozdzial.py b/src/librarian/elements/headers/naglowek_podrozdzial.py
index 213ec00..6865362 100644
--- a/src/librarian/elements/headers/naglowek_podrozdzial.py
+++ b/src/librarian/elements/headers/naglowek_podrozdzial.py
@@ -10,8 +10,6 @@ class NaglowekPodrozdzial(WLElement):
TXT_TOP_MARGIN = 3
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 3
- TXT_LEGACY_BOTTOM_MARGIN = 0
HTML_TAG = "h4"
diff --git a/src/librarian/elements/headers/naglowek_rozdzial.py b/src/librarian/elements/headers/naglowek_rozdzial.py
index 502dea1..e61f6a7 100644
--- a/src/librarian/elements/headers/naglowek_rozdzial.py
+++ b/src/librarian/elements/headers/naglowek_rozdzial.py
@@ -10,8 +10,6 @@ class NaglowekRozdzial(WLElement):
TXT_TOP_MARGIN = 4
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 4
- TXT_LEGACY_BOTTOM_MARGIN = 0
HTML_TAG = 'h3'
HTML_CLASS = 'wl'
diff --git a/src/librarian/elements/headers/naglowek_scena.py b/src/librarian/elements/headers/naglowek_scena.py
index e9ea5c7..a0ae5be 100644
--- a/src/librarian/elements/headers/naglowek_scena.py
+++ b/src/librarian/elements/headers/naglowek_scena.py
@@ -10,8 +10,6 @@ class NaglowekScena(WLElement):
TXT_TOP_MARGIN = 4
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 4
- TXT_LEGACY_BOTTOM_MARGIN = 0
HTML_TAG = 'h3'
diff --git a/src/librarian/elements/masters/__init__.py b/src/librarian/elements/masters/__init__.py
index d4f3cf0..54f5ba1 100644
--- a/src/librarian/elements/masters/__init__.py
+++ b/src/librarian/elements/masters/__init__.py
@@ -7,4 +7,4 @@ from ..base import WLElement
class Master(WLElement):
CAN_HAVE_TEXT = False
- TXT_LEGACY_BOTTOM_MARGIN = 2
+ TXT_BOTTOM_MARGIN = 2
diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py
index 63f74b5..6ee10d7 100644
--- a/src/librarian/elements/paragraphs/akap.py
+++ b/src/librarian/elements/paragraphs/akap.py
@@ -10,8 +10,6 @@ class Akap(WLElement):
TXT_TOP_MARGIN = 2
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 2
- TXT_LEGACY_BOTTOM_MARGIN = 0
EPUB_CLASS = 'paragraph'
diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py
index bccb01b..f698bc3 100644
--- a/src/librarian/elements/poetry/strofa.py
+++ b/src/librarian/elements/poetry/strofa.py
@@ -12,8 +12,6 @@ class Strofa(WLElement):
TXT_TOP_MARGIN = 2
TXT_BOTTOM_MARGIN = 2
- TXT_LEGACY_TOP_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 0
EPUB_TAG = HTML_TAG = 'div'
EPUB_CLASS = HTML_CLASS = 'stanza'
diff --git a/src/librarian/elements/poetry/wers.py b/src/librarian/elements/poetry/wers.py
index 127d5bd..cf2089c 100644
--- a/src/librarian/elements/poetry/wers.py
+++ b/src/librarian/elements/poetry/wers.py
@@ -9,8 +9,6 @@ class Wers(WLElement):
TXT_TOP_MARGIN = 1
TXT_BOTTOM_MARGIN = 1
- TXT_LEGACY_TOP_MARGIN = 1
- TXT_LEGACY_BOTTOM_MARGIN = 0
EPUB_TAG = HTML_TAG = 'div'
EPUB_CLASS = 'verse'
diff --git a/src/librarian/elements/separators/sekcja_asterysk.py b/src/librarian/elements/separators/sekcja_asterysk.py
index 35bd7a7..3ba8270 100644
--- a/src/librarian/elements/separators/sekcja_asterysk.py
+++ b/src/librarian/elements/separators/sekcja_asterysk.py
@@ -7,8 +7,6 @@ from ..base import WLElement
class SekcjaAsterysk(WLElement):
TXT_TOP_MARGIN = 2
TXT_BOTTOM_MARGIN = 4
- TXT_LEGACY_TOP_MARGIN = 2
- TXT_LEGACY_BOTTOM_MARGIN = 2
EPUB_TAG = HTML_TAG = "p"
HTML_CLASS = HTML_CLASS = "spacer-asterisk"
diff --git a/src/librarian/elements/separators/sekcja_swiatlo.py b/src/librarian/elements/separators/sekcja_swiatlo.py
index 805078e..e3ab103 100644
--- a/src/librarian/elements/separators/sekcja_swiatlo.py
+++ b/src/librarian/elements/separators/sekcja_swiatlo.py
@@ -6,7 +6,6 @@ from ..base import WLElement
class SekcjaSwiatlo(WLElement):
TXT_BOTTOM_MARGIN = 6
- TXT_LEGACY_BOTTOM_MARGIN = 4
HTML_TAG = "hr"
HTML_CLASS = "spacer"
diff --git a/src/librarian/elements/separators/separator_linia.py b/src/librarian/elements/separators/separator_linia.py
index 8874967..e5a5218 100644
--- a/src/librarian/elements/separators/separator_linia.py
+++ b/src/librarian/elements/separators/separator_linia.py
@@ -7,8 +7,6 @@ from ..base import WLElement
class SeparatorLinia(WLElement):
TXT_TOP_MARGIN = 4
TXT_BOTTOM_MARGIN = 4
- TXT_LEGACY_TOP_MARGIN = 2
- TXT_LEGACY_BOTTOM_MARGIN = 2
EPUB_TAG = HTML_TAG = "hr"
EPUB_CLASS = HTML_CLASS = "spacer-line"
diff --git a/src/librarian/elements/styles/__init__.py b/src/librarian/elements/styles/__init__.py
index 4453490..dd35dfc 100644
--- a/src/librarian/elements/styles/__init__.py
+++ b/src/librarian/elements/styles/__init__.py
@@ -2,7 +2,7 @@
# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
#
from .indeks_dolny import IndeksDolny
-from .mat import Mat
+from .mat import Mat, M, MRow, MFenced, MFrac, MSup
from .slowo_obce import SlowoObce
from .tytul_dziela import TytulDziela
from .wieksze_odstepy import WiekszeOdstepy
diff --git a/src/librarian/elements/styles/mat.py b/src/librarian/elements/styles/mat.py
index 68fcc3f..1512c32 100644
--- a/src/librarian/elements/styles/mat.py
+++ b/src/librarian/elements/styles/mat.py
@@ -6,6 +6,8 @@ from ..base import WLElement
class Mat(WLElement):
+ STRIP = True
+
def html_build(self, builder):
e = copy(self)
e.tag = 'math'
@@ -15,3 +17,31 @@ class Mat(WLElement):
def epub_build(self, builder):
builder.start_element('img', {"src": builder.mathml(self)})
builder.end_element()
+
+
+class M(WLElement):
+ STRIP = True
+
+
+class MRow(M):
+ pass
+
+
+class MFenced(M):
+ TXT_PREFIX = '('
+ TXT_SUFFIX = ')'
+
+
+class MFrac(M):
+ TXT_PREFIX = '('
+ TXT_SUFFIX = ')'
+
+ def txt_after_child(self, builder, child_count):
+ if child_count:
+ builder.push_text(') / (')
+
+
+class MSup(M):
+ def txt_after_child(self, builder, child_count):
+ if child_count:
+ builder.push_text(' ^ ')
diff --git a/src/librarian/elements/tools/__init__.py b/src/librarian/elements/tools/__init__.py
index c4820b1..69a26ea 100644
--- a/src/librarian/elements/tools/__init__.py
+++ b/src/librarian/elements/tools/__init__.py
@@ -70,3 +70,13 @@ class Tab(WLElement):
}
get_epub_attr = get_html_attr
+
+ def txt_build(self, builder):
+ szer = self.get('szer', '1').strip()
+ if szer.endswith('em'):
+ szer = szer[:-2]
+ try:
+ szer = int(szer)
+ except:
+ szer = 1
+ builder.push_text(' ' * 4 * szer)
diff --git a/tests/files/tags/didaskalia/1.expected.html b/tests/files/tags/didaskalia/1.expected.html
index efefbab..88137d5 100644
--- a/tests/files/tags/didaskalia/1.expected.html
+++ b/tests/files/tags/didaskalia/1.expected.html
@@ -8,7 +8,7 @@ Na toż bym siÄ, mocium panie,Kawalerstwa dziÅ w
uderzajÄ
c w stóÅ
-
By kto... niech go piorun trzaÅnie!
+
By kto⦠niech go piorun trzaÅnie!
DÅugo bÄdzie na to czekaÅ,
po krótkim milczeniu, biorÄ
c talerz
@@ -21,7 +21,7 @@ Na toż bym siÄ, mocium panie,Kawalerstwa dziÅ w
Bawi z nami â w domu Klary,
Bo krewniaczka jej daleka,
-
10Ale mnie siÄ wszystko zdaje...
+
10Ale mnie siÄ wszystko zdajeâ¦
diff --git a/tests/files/tags/kwestia/1.expected.html b/tests/files/tags/kwestia/1.expected.html
index 2fe9773..54109b5 100644
--- a/tests/files/tags/kwestia/1.expected.html
+++ b/tests/files/tags/kwestia/1.expected.html
@@ -4,7 +4,7 @@
1WiÄc jako dawniej czynili mocarze,
Z Lechem siÄ mieniaÅ Scyta na obrÄ
czki;
A pokochawszy mocniej sercem, w darze
-DaÅ mu koronÄ... stÄ
d nasza korona.
+DaÅ mu koronÄ⦠stÄ
d nasza korona.
5Zbawiciel niegdyÅ wyciÄ
gajÄ
c rÄ
czki
SzedÅ do niej z matki zadumanej Åona
I ku rubinom podawaÅ siÄ caÅy
diff --git a/tests/files/tags/kwestia/2.expected.html b/tests/files/tags/kwestia/2.expected.html
index 5783d97..b73287a 100644
--- a/tests/files/tags/kwestia/2.expected.html
+++ b/tests/files/tags/kwestia/2.expected.html
@@ -1,11 +1,11 @@
GÅOS HESI
-
1MamuÅciu, tak zimno! troszkÄ ciepÅej wody...
+
1MamuÅciu, tak zimno! troszkÄ ciepÅej wodyâ¦
DULSKA
-
2Jeszcze czego? Hartujcie siÄ... Felicjan! wstajesz? Wiesz? ten bÅazen, twój syn, nie wróciÅ jeszcze do domu! Co? nic nie mówisz? naturalnie.
+2
Jeszcze czego? Hartujcie siÄ⦠Felicjan! wstajesz? Wiesz? ten bÅazen, twój syn, nie wróciÅ jeszcze do domu! Co? nic nie mówisz? naturalnie.
Ojciec toleruje. Niedaleko padÅo jabÅko od jabÅoni. Ale jak bÄdÄ
dÅużki maÅe â nie zapÅacÄ.
diff --git a/tests/files/tags/motto_podpis/1.expected.html b/tests/files/tags/motto_podpis/1.expected.html
index 1ff589b..ec6cecb 100644
--- a/tests/files/tags/motto_podpis/1.expected.html
+++ b/tests/files/tags/motto_podpis/1.expected.html
@@ -5,7 +5,7 @@
1Dovete adunque sapere come sono/
-due generazioni da combattere...../
+due generazioni da combattereâ¦../
bisogna essere volpe e leone.
diff --git a/tests/files/tags/naglowek_osoba/1.expected.html b/tests/files/tags/naglowek_osoba/1.expected.html
index 4903bb3..1bbd782 100644
--- a/tests/files/tags/naglowek_osoba/1.expected.html
+++ b/tests/files/tags/naglowek_osoba/1.expected.html
@@ -6,7 +6,7 @@
1PiÄkne dobra w każdym wzglÄdzie â
Lasy â gleba wyÅmienita â
DobrÄ
żonÄ
pewnie bÄdzie â
-Co za czynsze! â To kobiéta!...
+Co za czynsze! â To kobiéta!â¦
5Trzy folwarki!
diff --git a/tests/files/tags/nota/2.expected.html b/tests/files/tags/nota/2.expected.html
index 6006ef6..d78347f 100644
--- a/tests/files/tags/nota/2.expected.html
+++ b/tests/files/tags/nota/2.expected.html
@@ -12,7 +12,7 @@
I nie przyjmowaÅ nigdy, jak wiek wiekiem.
Bo glina w glinÄ wtapia siÄ bez przerwy,
5Gdy sprzeczne ciaÅa zbija siÄ aż Äwiekiem
-Później... lub pierwéj...
+Później⦠lub pierwéjâ¦
PisaÅem w Paryżu 1856 w styczniu.
diff --git a/tests/files/tags/nota/3.expected.html b/tests/files/tags/nota/3.expected.html
index b7d62b4..243db4a 100644
--- a/tests/files/tags/nota/3.expected.html
+++ b/tests/files/tags/nota/3.expected.html
@@ -1,8 +1,8 @@
1Jego marzenie siÄ nie speÅniÅo: nie ożeniÅ siÄ, choÄ byÅ już na to zupeÅnie zdecydowany,
-gdy skoÅczono oporzÄ
dzaÄ jego mieszkanie. [...] Rzecz dziwna, czym jest mundur dla takich ludzi.
+gdy skoÅczono oporzÄ
dzaÄ jego mieszkanie. [â¦] Rzecz dziwna, czym jest mundur dla takich ludzi.
Uwaga tÅumacza. Drukowane obecnie w âNowej Reformieâ
Wspomnienia Sybiraka (pamiÄtniki Józefa BogusÅawskiego) pozwalajÄ
szczÄÅliwym trafem podaÄ w caÅoÅci nazwiska Polaków, o których mówi Dostojewski w swoich pamiÄtnikach,
-oznaczajÄ
c te nazwiska tylko pierwszymi literami, z dodaniem czasami koÅcówek. [...]
+oznaczajÄ
c te nazwiska tylko pierwszymi literami, z dodaniem czasami koÅcówek. [â¦]
diff --git a/tests/files/tags/poezja_cyt/1.expected.html b/tests/files/tags/poezja_cyt/1.expected.html
index b0d195b..17e1d92 100644
--- a/tests/files/tags/poezja_cyt/1.expected.html
+++ b/tests/files/tags/poezja_cyt/1.expected.html
@@ -1,4 +1,4 @@
1Tymczasem przenoÅ duszÄ mojÄ
utÄsknionÄ
-
Do tych pagórków leÅnych, do tych ÅÄ
k zielonych...
+
Do tych pagórków leÅnych, do tych ÅÄ
k zielonychâ¦
diff --git a/tests/files/tags/slowo_obce/1.expected.html b/tests/files/tags/slowo_obce/1.expected.html
index 8fa6808..5c8ce78 100644
--- a/tests/files/tags/slowo_obce/1.expected.html
+++ b/tests/files/tags/slowo_obce/1.expected.html
@@ -1,3 +1,3 @@
-
1Na czwarty dzieÅ przywiózÅ mu stójka z osÅowickiej apteki diachylum; ZoÅzikiewicz rozsmarowaÅ na pÅatek...
+
1Na czwarty dzieÅ przywiózÅ mu stójka z osÅowickiej apteki diachylum; ZoÅzikiewicz rozsmarowaÅ na pÅatekâ¦
diff --git a/tests/files/tags/tabela/1.expected.html b/tests/files/tags/tabela/1.expected.html
index fe9ef5f..f839494 100644
--- a/tests/files/tags/tabela/1.expected.html
+++ b/tests/files/tags/tabela/1.expected.html
@@ -1,22 +1,22 @@
-
- a |
- b |
-
-
- c |
- d |
-
+
+a |
+b |
+
+
+c |
+d |
+
-
- a |
- b |
-
-
- c |
- d |
-
+
+a |
+b |
+
+
+c |
+d |
+
diff --git a/tests/files/tags/tytul_dziela/1.expected.html b/tests/files/tags/tytul_dziela/1.expected.html
index 364942d..f246117 100644
--- a/tests/files/tags/tytul_dziela/1.expected.html
+++ b/tests/files/tags/tytul_dziela/1.expected.html
@@ -4,5 +4,5 @@
Kilka uwag o âHamlecieâ
Szekspira
- pióra...
+ pióraâ¦
diff --git a/tests/files/tags/wers_wciety/1.expected.html b/tests/files/tags/wers_wciety/1.expected.html
index 0ee7c74..b2254fb 100644
--- a/tests/files/tags/wers_wciety/1.expected.html
+++ b/tests/files/tags/wers_wciety/1.expected.html
@@ -1,6 +1,6 @@
-Julisz SÅowackiAnioÅ ognisty â mój anioÅ lewy...
+Julisz SÅowackiAnioÅ ognisty â mój anioÅ lewyâ¦
1AnioÅ ognisty â mój anioÅ lewy
diff --git a/tests/files/tags/wers_wciety/2.expected.html b/tests/files/tags/wers_wciety/2.expected.html
index 85b8e22..f502985 100644
--- a/tests/files/tags/wers_wciety/2.expected.html
+++ b/tests/files/tags/wers_wciety/2.expected.html
@@ -1,8 +1,10 @@
-
1zwykÅy
-
wciÄty
-
wciÄty 1
-
wciÄty 2
-
5wciÄty 25
-
akapitowy
-
Årodek
-
do prawej
+
+
1zwykÅy
+
wciÄty
+
wciÄty 1
+
wciÄty 2
+
5wciÄty 25
+
akapitowy
+
Årodek
+
do prawej
+
diff --git a/tests/files/tags/wers_wciety/2.xml b/tests/files/tags/wers_wciety/2.xml
index c31d2cb..9ce9f6a 100644
--- a/tests/files/tags/wers_wciety/2.xml
+++ b/tests/files/tags/wers_wciety/2.xml
@@ -1,10 +1,10 @@
- zwykÅy
- wciÄty
- wciÄty 1
- wciÄty 2
- wciÄty 25
- akapitowy
- Årodek
+ zwykÅy/
+ wciÄty/
+ wciÄty 1/
+ wciÄty 2/
+ wciÄty 25/
+ akapitowy/
+ Årodek/
do prawej
diff --git a/tests/files/tags/wyroznienie/1.expected.html b/tests/files/tags/wyroznienie/1.expected.html
index 65cffe3..d160084 100644
--- a/tests/files/tags/wyroznienie/1.expected.html
+++ b/tests/files/tags/wyroznienie/1.expected.html
@@ -5,5 +5,5 @@
Blade jak Åwit,
5â Gdy życia koniec szepce do poczÄ
tku:
-âNie stargam ciÄ ja â nie! â Ja, u-wydatniÄ!...â
+
âNie stargam ciÄ ja â nie! â Ja, u-wydatniÄ!â¦â
diff --git a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
index cac61d8..29e243a 100644
--- a/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
+++ b/tests/files/text/asnyk_miedzy_nami_expected_raw.txt
@@ -1,5 +1,3 @@
-
-
MiÄdzy nami nic nie byÅo!
Å»adnych zwierzeÅ, wyznaŠżadnych!
Nic nas z sobÄ
nie ÅÄ
czyÅo â
diff --git a/tests/test_text.py b/tests/test_text.py
index 7797530..9109413 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -2,9 +2,7 @@
# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
#
import unittest
-from librarian import NoDublinCore
from librarian.builders import builders
-from librarian.parser import WLDocument as LegacyWLDocument
from librarian.document import WLDocument
from .utils import get_fixture
@@ -12,16 +10,6 @@ from .utils import get_fixture
class TextTests(unittest.TestCase):
maxDiff = None
- def test_transform_legacy(self):
- expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
-
- text = LegacyWLDocument.from_file(
- get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_text().get_bytes().decode('utf-8')
-
- with open(expected_output_file_path, 'rb') as f:
- self.assertEqual(text, f.read().decode('utf-8'))
-
def test_transform(self):
expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
@@ -36,22 +24,9 @@ class TextTests(unittest.TestCase):
def test_transform_raw(self):
expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
- text = LegacyWLDocument.from_file(
- get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
- ).as_text(flags=['raw-text']).get_bytes().decode('utf-8')
+ text = WLDocument(
+ filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+ ).build(builders['txt'], raw_text=True).get_bytes().decode('utf-8')
with open(expected_output_file_path, 'rb') as f:
self.assertEqual(text, f.read().decode('utf-8'))
-
- def test_no_dublincore(self):
- with self.assertRaises(NoDublinCore):
- LegacyWLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
- ).as_text()
-
- def test_passing_parse_dublincore_to_transform(self):
- """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
- LegacyWLDocument.from_file(
- get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
- parse_dublincore=False,
- ).as_text()