From 7231c2b5e9364aefd27a0bc8a1033de90bb3d878 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Mon, 3 Apr 2023 08:08:50 +0200 Subject: [PATCH 1/1] assigning ids --- setup.py | 2 +- src/librarian/document.py | 28 +++++- src/librarian/elements/blocks/dedykacja.py | 2 + src/librarian/elements/drama/didaskalia.py | 2 + src/librarian/elements/drama/lista_osob.py | 1 + src/librarian/elements/drama/miejsce_czas.py | 3 +- .../elements/drama/naglowek_listy.py | 2 + .../elements/drama/naglowek_osoba.py | 2 + src/librarian/elements/figures/animacja.py | 2 + src/librarian/elements/figures/ilustr.py | 2 + src/librarian/elements/figures/tabela.py | 2 + src/librarian/elements/front/base.py | 1 + src/librarian/elements/front/motto.py | 2 + src/librarian/elements/front/motto_podpis.py | 1 + .../elements/headers/naglowek_czesc.py | 1 + .../elements/headers/naglowek_podrozdzial.py | 1 + .../elements/headers/naglowek_rozdzial.py | 1 + .../elements/headers/naglowek_scena.py | 1 + .../elements/headers/podtytul_czesc.py | 2 + .../elements/headers/podtytul_podrozdzial.py | 2 + .../elements/headers/podtytul_rozdzial.py | 2 + src/librarian/elements/paragraphs/akap.py | 1 + src/librarian/elements/poetry/strofa.py | 2 + src/librarian/xslt/book2html.xslt | 89 +++++++++++++++---- 24 files changed, 134 insertions(+), 20 deletions(-) diff --git a/setup.py b/setup.py index cdeb58f..c39d2bb 100755 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def whole_tree(prefix, path): setup( name='librarian', - version='2.4.10', + version='2.4.11', description='Converter from WolneLektury.pl XML-based language to XHTML, TXT and other formats', author="Marek Stępniowski", author_email='marek@stepniowski.com', diff --git a/src/librarian/document.py b/src/librarian/document.py index fbaf6ca..ea8a47a 100644 --- a/src/librarian/document.py +++ b/src/librarian/document.py @@ -36,11 +36,37 @@ class WLDocument: filename=self.provider.by_slug(part_uri.slug), provider=self.provider ) - def build(self, builder, base_url=None, **kwargs): return builder(base_url=base_url).build(self, **kwargs) + def assign_ids(self): + # Find all existing IDs. + existing = set() + que = [self.tree.getroot()] + while que: + item = que.pop(0) + try: + item.normalize_insides() + except AttributeError: + pass + existing.add(item.attrib.get('id')) + que.extend(item) + + i = 1 + que = [self.tree.getroot()] + while que: + item = que.pop(0) + que.extend(item) + if item.attrib.get('id'): + continue + if not getattr(item, 'SHOULD_HAVE_ID', False): + continue + while f'e{i}' in existing: + i += 1 + item.attrib['id'] = f'e{i}' + i += 1 + def _compat_assign_ordered_ids(self): """ Compatibility: ids in document order, to be roughly compatible with legacy diff --git a/src/librarian/elements/blocks/dedykacja.py b/src/librarian/elements/blocks/dedykacja.py index 7ac809d..6cb68c0 100644 --- a/src/librarian/elements/blocks/dedykacja.py +++ b/src/librarian/elements/blocks/dedykacja.py @@ -2,6 +2,8 @@ from ..base import WLElement class Dedykacja(WLElement): + SHOULD_HAVE_ID = True + TXT_LEGACY_TOP_MARGIN = 2 EPUB_TAG = HTML_TAG = "div" diff --git a/src/librarian/elements/drama/didaskalia.py b/src/librarian/elements/drama/didaskalia.py index bf81b69..11ae486 100644 --- a/src/librarian/elements/drama/didaskalia.py +++ b/src/librarian/elements/drama/didaskalia.py @@ -2,6 +2,8 @@ from ..base import WLElement class Didaskalia(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_PARGIN = 2 TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 2 diff --git a/src/librarian/elements/drama/lista_osob.py b/src/librarian/elements/drama/lista_osob.py index 269b05c..8895aa8 100644 --- a/src/librarian/elements/drama/lista_osob.py +++ b/src/librarian/elements/drama/lista_osob.py @@ -4,6 +4,7 @@ from ..base import WLElement class ListaOsob(WLElement): CAN_HAVE_TEXT = False + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 3 diff --git a/src/librarian/elements/drama/miejsce_czas.py b/src/librarian/elements/drama/miejsce_czas.py index cf47ed2..8908bea 100644 --- a/src/librarian/elements/drama/miejsce_czas.py +++ b/src/librarian/elements/drama/miejsce_czas.py @@ -2,7 +2,8 @@ from ..paragraphs import Akap class MiejsceCzas(Akap): - EPUB_CLASS = HTML_CLASS = 'place-and-time' + SHOULD_HAVE_ID = True EPUB_TAG = "div" + EPUB_CLASS = HTML_CLASS = 'place-and-time' diff --git a/src/librarian/elements/drama/naglowek_listy.py b/src/librarian/elements/drama/naglowek_listy.py index 1f164a4..2f3eec4 100644 --- a/src/librarian/elements/drama/naglowek_listy.py +++ b/src/librarian/elements/drama/naglowek_listy.py @@ -2,6 +2,8 @@ from ..base import WLElement class NaglowekListy(WLElement): + SHOULD_HAVE_ID = True + HTML_TAG = "h3" EPUB_TAG = "div" diff --git a/src/librarian/elements/drama/naglowek_osoba.py b/src/librarian/elements/drama/naglowek_osoba.py index afa16ce..b3f918e 100644 --- a/src/librarian/elements/drama/naglowek_osoba.py +++ b/src/librarian/elements/drama/naglowek_osoba.py @@ -2,6 +2,8 @@ from ..base import WLElement class NaglowekOsoba(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 3 diff --git a/src/librarian/elements/figures/animacja.py b/src/librarian/elements/figures/animacja.py index e98fa88..a32f9e0 100644 --- a/src/librarian/elements/figures/animacja.py +++ b/src/librarian/elements/figures/animacja.py @@ -2,6 +2,8 @@ from ..base import WLElement class Animacja(WLElement): + SHOULD_HAVE_ID = True + HTML_TAG = 'div' HTML_CLASS = "animacja cycle-slideshow" HTML_ATTR = { diff --git a/src/librarian/elements/figures/ilustr.py b/src/librarian/elements/figures/ilustr.py index af936fb..9b3b165 100644 --- a/src/librarian/elements/figures/ilustr.py +++ b/src/librarian/elements/figures/ilustr.py @@ -4,6 +4,8 @@ from ..base import WLElement class Ilustr(WLElement): + SHOULD_HAVE_ID = True + EPUB_TAG = HTML_TAG = 'img' def get_html_attr(self, builder): diff --git a/src/librarian/elements/figures/tabela.py b/src/librarian/elements/figures/tabela.py index 7da7877..ba02b20 100644 --- a/src/librarian/elements/figures/tabela.py +++ b/src/librarian/elements/figures/tabela.py @@ -2,6 +2,8 @@ from ..base import WLElement class Tabela(WLElement): + SHOULD_HAVE_ID = True + EPUB_TAG = HTML_TAG = 'table' def get_html_attr(self, builder): diff --git a/src/librarian/elements/front/base.py b/src/librarian/elements/front/base.py index 9e961df..90ec348 100644 --- a/src/librarian/elements/front/base.py +++ b/src/librarian/elements/front/base.py @@ -3,6 +3,7 @@ from ..base import WLElement class HeaderElement(WLElement): HTML_TAG = 'span' + SHOULD_HAVE_ID = True def txt_build(self, builder): builder.enter_fragment('header') diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py index 7f23ea6..48eac66 100644 --- a/src/librarian/elements/front/motto.py +++ b/src/librarian/elements/front/motto.py @@ -2,6 +2,8 @@ from ..base import WLElement class Motto(WLElement): + SHOULD_HAVE_ID = True + TXT_LEGACY_TOP_MARGIN = 4 TXT_LEGACY_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/front/motto_podpis.py b/src/librarian/elements/front/motto_podpis.py index 8fee127..4b2ee0d 100644 --- a/src/librarian/elements/front/motto_podpis.py +++ b/src/librarian/elements/front/motto_podpis.py @@ -2,6 +2,7 @@ from ..base import WLElement class MottoPodpis(WLElement): + SHOULD_HAVE_ID = True HTML_TAG = "p" EPUB_CLASS = HTML_CLASS = "motto_podpis" diff --git a/src/librarian/elements/headers/naglowek_czesc.py b/src/librarian/elements/headers/naglowek_czesc.py index 829e4f4..9caceb0 100644 --- a/src/librarian/elements/headers/naglowek_czesc.py +++ b/src/librarian/elements/headers/naglowek_czesc.py @@ -3,6 +3,7 @@ from ..base import WLElement class NaglowekCzesc(WLElement): SECTION_PRECEDENCE = 1 + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 5 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/naglowek_podrozdzial.py b/src/librarian/elements/headers/naglowek_podrozdzial.py index ee338eb..a7328f5 100644 --- a/src/librarian/elements/headers/naglowek_podrozdzial.py +++ b/src/librarian/elements/headers/naglowek_podrozdzial.py @@ -3,6 +3,7 @@ from ..base import WLElement class NaglowekPodrozdzial(WLElement): SECTION_PRECEDENCE = 3 + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/naglowek_rozdzial.py b/src/librarian/elements/headers/naglowek_rozdzial.py index 33ff355..7492da4 100644 --- a/src/librarian/elements/headers/naglowek_rozdzial.py +++ b/src/librarian/elements/headers/naglowek_rozdzial.py @@ -3,6 +3,7 @@ from ..base import WLElement class NaglowekRozdzial(WLElement): SECTION_PRECEDENCE = 2 + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 4 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/naglowek_scena.py b/src/librarian/elements/headers/naglowek_scena.py index 8a52ca2..e4e0fd1 100644 --- a/src/librarian/elements/headers/naglowek_scena.py +++ b/src/librarian/elements/headers/naglowek_scena.py @@ -3,6 +3,7 @@ from ..base import WLElement class NaglowekScena(WLElement): SECTION_PRECEDENCE = 2 + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 4 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/podtytul_czesc.py b/src/librarian/elements/headers/podtytul_czesc.py index df8fd5c..405f2d6 100644 --- a/src/librarian/elements/headers/podtytul_czesc.py +++ b/src/librarian/elements/headers/podtytul_czesc.py @@ -2,6 +2,8 @@ from ..base import WLElement class PodtytulCzesc(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/podtytul_podrozdzial.py b/src/librarian/elements/headers/podtytul_podrozdzial.py index cc00207..d6fc2ec 100644 --- a/src/librarian/elements/headers/podtytul_podrozdzial.py +++ b/src/librarian/elements/headers/podtytul_podrozdzial.py @@ -2,6 +2,8 @@ from ..base import WLElement class PodtytulPodrozdzial(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/headers/podtytul_rozdzial.py b/src/librarian/elements/headers/podtytul_rozdzial.py index f8db548..e30a5e2 100644 --- a/src/librarian/elements/headers/podtytul_rozdzial.py +++ b/src/librarian/elements/headers/podtytul_rozdzial.py @@ -2,6 +2,8 @@ from ..base import WLElement class PodtytulRozdzial(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py index 18f69d7..004f8ec 100644 --- a/src/librarian/elements/paragraphs/akap.py +++ b/src/librarian/elements/paragraphs/akap.py @@ -3,6 +3,7 @@ from ..base import WLElement class Akap(WLElement): STRIP = True + SHOULD_HAVE_ID = True TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py index a843d20..e925571 100644 --- a/src/librarian/elements/poetry/strofa.py +++ b/src/librarian/elements/poetry/strofa.py @@ -5,6 +5,8 @@ from .wers import Wers class Strofa(WLElement): + SHOULD_HAVE_ID = True + TXT_TOP_MARGIN = 2 TXT_BOTTOM_MARGIN = 2 TXT_LEGACY_TOP_MARGIN = 1 diff --git a/src/librarian/xslt/book2html.xslt b/src/librarian/xslt/book2html.xslt index 2057326..c0567e8 100644 --- a/src/librarian/xslt/book2html.xslt +++ b/src/librarian/xslt/book2html.xslt @@ -185,7 +185,10 @@ -
+
+ + +
@@ -194,6 +197,7 @@
+ ilustr @@ -238,6 +242,7 @@
+
@@ -249,64 +254,94 @@ - + + + + - + + + + - + + + + - + + + + -

+

+ + +

+
-

+

+ + +

+
- -

+ +

+ + +

+
- -

+ +

+ + +

- -
+ +
+ + +
@@ -315,6 +350,7 @@

+

@@ -325,7 +361,8 @@
-
+
+ @@ -387,17 +424,26 @@ -

+

+ + +

-
+ + + +
-
+ + + +
@@ -611,6 +657,15 @@ + + + + wl- + + + + + -- 2.20.1