From: Radek Czajka Date: Tue, 22 Oct 2024 13:59:19 +0000 (+0200) Subject: wip change fb2 api X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/23d025c8875cca1404f274aca7170c9db5e980e7?ds=inline wip change fb2 api --- diff --git a/src/librarian/__init__.py b/src/librarian/__init__.py index 0ed908c..4fbed92 100644 --- a/src/librarian/__init__.py +++ b/src/librarian/__init__.py @@ -77,7 +77,8 @@ RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/') XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml") PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/") - +FB2NS = XMLNamespace("http://www.gribuser.ru/xml/fictionbook/2.0") +XLINKNS = XMLNamespace("http://www.w3.org/1999/xlink") WLNS = EmptyNamespace() diff --git a/src/librarian/builders/__init__.py b/src/librarian/builders/__init__.py index e47c557..85f8427 100644 --- a/src/librarian/builders/__init__.py +++ b/src/librarian/builders/__init__.py @@ -9,6 +9,7 @@ from .daisy import DaisyBuilder from .epub import EpubBuilder from .mobi import MobiBuilder from .pdf import PdfBuilder +from .fb2 import FB2Builder builders = OrderedDict([ @@ -23,4 +24,5 @@ builders = OrderedDict([ ("epub", EpubBuilder), ("mobi", MobiBuilder), ("pdf", PdfBuilder), + ("fb2", FB2Builder), ]) diff --git a/src/librarian/builders/fb2.py b/src/librarian/builders/fb2.py new file mode 100644 index 0000000..f4fe635 --- /dev/null +++ b/src/librarian/builders/fb2.py @@ -0,0 +1,173 @@ +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. +# +from lxml import etree +from librarian import FB2NS, XLINKNS, OutputFile +from .html import TreeBuilder + + +class FB2Builder(TreeBuilder): + file_extension = 'fb2' + build_method_fn = 'fb2_build' + orphans = False + + def __init__(self, base_url=None): + self.tree = etree.Element( + FB2NS('FictionBook'), + nsmap={ + None: FB2NS.uri, + 'l': XLINKNS.uri, + } + ) + description = etree.SubElement(self.tree, 'description') + self.body = etree.SubElement(self.tree, 'body') + self.header = etree.SubElement(self.body, 'title') + self.epigraph = etree.SubElement(self.body, 'epigraph') + self.text = etree.SubElement(self.body, 'section') + + self.footnotes = etree.Element(FB2NS('body'), name="notes") + self.sections = [] + + self.cursors = { + None: self.text, + 'meta': description, + 'header': self.header, + 'epigraph': self.epigraph, + 'footnotes': self.footnotes, + #'nota_red': self.nota_red, + } + self.current_cursors = [self.text] + self.add_epigraph() + + def start_section(self, precedence): + while self.sections and self.sections[-1] >= precedence: + self.end_element() + self.sections.pop() + self.start_element('section') + self.sections.append(precedence) + + def add_epigraph(self): + self.enter_fragment('epigraph') + self.start_element(FB2NS('p')) + self.push_text('Utwór opracowany został w\xa0ramach projektu ') + self.start_element(FB2NS('a'), {XLINKNS('href'): 'https://wolnelektury.pl/'}) + self.push_text('Wolne Lektury') + self.end_element() + self.push_text(' przez ') + self.start_element(FB2NS('a'), {XLINKNS('href'): 'https://fundacja.wolnelektury.pl/'}) + self.push_text('fundację Wolne Lektury') + self.end_element() + self.push_text('.') + self.end_element() + self.exit_fragment() + + def add_meta(self, doc): + self.enter_fragment('meta') + + self.start_element('title-info') + + self.start_element('genre') + self.push_text('literature') + self.end_element() + for author in doc.meta.authors: + self.start_element('author') + self.simple_element('first-name', ' '.join(author.first_names)) + self.simple_element('last-name', author.last_name) + self.end_element() + self.simple_element('book-title', doc.meta.title) + if doc.meta.released_to_public_domain_at: + self.simple_element('date', doc.meta.released_to_public_domain_at) + self.simple_element('lang', doc.meta.language) + + self.end_element() + + self.start_element('document-info') + # contributor.editor + # contributor.technical_editor + self.simple_element('program-used', 'Wolne Lektury Librarian') + self.simple_element('date', doc.meta.created_at) + self.simple_element('id', str(doc.meta.url)) + self.simple_element('version', '0') + + self.end_element() + self.start_element('publish-info') + self.simple_element('publisher', '; '.join(doc.meta.publisher)) + self.end_element() + self.exit_fragment() + + def build(self, doc, mp3=None): + self.add_meta(doc) + doc.tree.getroot().fb2_build(self) + return self.output() + + def output(self): + return OutputFile.from_bytes( + etree.tostring( + self.tree, + encoding='utf-8', + pretty_print=True, + xml_declaration=True, + ) + ) + + + + +''' +import os.path +from copy import deepcopy +from lxml import etree + +from librarian import functions, OutputFile +from .epub import replace_by_verse + + +functions.reg_substitute_entities() +functions.reg_person_name() + + +def sectionify(tree): + """Finds section headers and adds a tree of _section tags.""" + sections = [ + 'naglowek_czesc', + 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', + 'naglowek_podrozdzial'] + section_level = dict((v, k) for (k, v) in enumerate(sections)) + + # We can assume there are just subelements an no text at section level. + for level, section_name in reversed(list(enumerate(sections))): + for header in tree.findall('//' + section_name): + section = header.makeelement("_section") + header.addprevious(section) + section.append(header) + sibling = section.getnext() + while (sibling is not None and + section_level.get(sibling.tag, 1000) > level): + section.append(sibling) + sibling = section.getnext() + + +def transform(wldoc, verbose=False, + cover=None, flags=None): + document = deepcopy(wldoc) + del wldoc + + if flags: + for flag in flags: + document.edoc.getroot().set(flag, 'yes') + + document.clean_ed_note() + document.clean_ed_note('abstrakt') + + style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') + style = etree.parse(style_filename) + + replace_by_verse(document.edoc) + sectionify(document.edoc) + + result = document.transform(style) + + return OutputFile.from_bytes(str(result).encode('utf-8')) + +# vim:et +''' diff --git a/src/librarian/builders/html.py b/src/librarian/builders/html.py index 66db675..e6530a5 100644 --- a/src/librarian/builders/html.py +++ b/src/librarian/builders/html.py @@ -45,6 +45,11 @@ class TreeBuilder: else: cursor.text = (cursor.text or '') + text + def simple_element(self, tag, text='', attrib=None): + self.start_element(tag, attrib) + self.push_text(text) + self.end_element() + class HtmlBuilder(TreeBuilder): build_method_fn = 'html_build' diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py index 1f22929..2d656ae 100644 --- a/src/librarian/elements/base.py +++ b/src/librarian/elements/base.py @@ -37,7 +37,9 @@ class WLElement(etree.ElementBase): EPUB_ATTR = {} EPUB_CLASS = None EPUB_START_CHUNK = False - + + FB2_TAG = None + CAN_HAVE_TEXT = True STRIP = False NUMBERING = None @@ -209,6 +211,25 @@ class WLElement(etree.ElementBase): if self.HTML_TAG: builder.end_element() + def fb2_build(self, builder): + if self.SECTION_PRECEDENCE: + builder.start_section(self.SECTION_PRECEDENCE) + builder.start_element('title') + builder.start_element('p') + + if self.FB2_TAG: + builder.start_element( + self.FB2_TAG, + #self.get_fb2_attr(builder), + ) + + self.build_inner(builder) + if self.FB2_TAG: + builder.end_element() + if self.SECTION_PRECEDENCE: + builder.end_element() + builder.end_element() + def epub_build_inner(self, builder): self.build_inner(builder) diff --git a/src/librarian/elements/blocks/dedykacja.py b/src/librarian/elements/blocks/dedykacja.py index ae00b72..a8e7406 100644 --- a/src/librarian/elements/blocks/dedykacja.py +++ b/src/librarian/elements/blocks/dedykacja.py @@ -12,3 +12,5 @@ class Dedykacja(WLElement): EPUB_TAG = HTML_TAG = "div" EPUB_CLASS = HTML_CLASS = "dedication" + + FB2_TAG = 'cite' diff --git a/src/librarian/elements/blocks/dlugi_cytat.py b/src/librarian/elements/blocks/dlugi_cytat.py index bdac225..cf1515f 100644 --- a/src/librarian/elements/blocks/dlugi_cytat.py +++ b/src/librarian/elements/blocks/dlugi_cytat.py @@ -14,3 +14,5 @@ class DlugiCytat(WLElement): EPUB_TAG = 'div' EPUB_CLASS = 'block' + + FB2_TAG = 'cite' diff --git a/src/librarian/elements/blocks/nota.py b/src/librarian/elements/blocks/nota.py index dbba883..6bc0ff3 100644 --- a/src/librarian/elements/blocks/nota.py +++ b/src/librarian/elements/blocks/nota.py @@ -10,4 +10,6 @@ class Nota(WLElement): EPUB_TAG = HTML_TAG = "div" EPUB_CLASS = HTML_CLASS = "note" + FB2_TAG = 'cite' + SUPPRESS_NUMBERING = {'main': 'i'} diff --git a/src/librarian/elements/figures/kol.py b/src/librarian/elements/figures/kol.py index fa09fc7..342f89a 100644 --- a/src/librarian/elements/figures/kol.py +++ b/src/librarian/elements/figures/kol.py @@ -5,5 +5,5 @@ from ..base import WLElement class Kol(WLElement): - EPUB_TAG = HTML_TAG = 'td' + EPUB_TAG = HTML_TAG = FB2_TAG = 'td' TXT_PREFIX = ' ' * 4 diff --git a/src/librarian/elements/figures/tabela.py b/src/librarian/elements/figures/tabela.py index 5ab04e0..5422b61 100644 --- a/src/librarian/elements/figures/tabela.py +++ b/src/librarian/elements/figures/tabela.py @@ -11,7 +11,7 @@ class Tabela(WLElement): TXT_TOP_MARGIN = 3 TXT_BOTTOM_MARGIN = 3 - EPUB_TAG = HTML_TAG = 'table' + EPUB_TAG = HTML_TAG = FB2_TAG = 'table' def get_html_attr(self, builder): if self.attrib.get('ramka', '') == '1': diff --git a/src/librarian/elements/figures/wiersz.py b/src/librarian/elements/figures/wiersz.py index c013558..7b57997 100644 --- a/src/librarian/elements/figures/wiersz.py +++ b/src/librarian/elements/figures/wiersz.py @@ -6,6 +6,6 @@ from ..base import WLElement class Wiersz(WLElement): CAN_HAVE_TEXT = False - EPUB_TAG = HTML_TAG = 'tr' + EPUB_TAG = HTML_TAG = FB2_TAG = 'tr' TXT_TOP_MARGIN = 1 TXT_BOTTOM_MARGIN = 1 diff --git a/src/librarian/elements/front/base.py b/src/librarian/elements/front/base.py index ad16ca3..a0fb305 100644 --- a/src/librarian/elements/front/base.py +++ b/src/librarian/elements/front/base.py @@ -7,13 +7,19 @@ from ..base import WLElement class HeaderElement(WLElement): NUMBERING = 'i' HTML_TAG = 'span' + FB2_TAG = 'p' def txt_build(self, builder): builder.enter_fragment('header') - super(HeaderElement, self).txt_build(builder) + super().txt_build(builder) builder.exit_fragment() def html_build(self, builder): builder.enter_fragment('header') - super(HeaderElement, self).html_build(builder) + super().html_build(builder) + builder.exit_fragment() + + def fb2_build(self, builder): + builder.enter_fragment('header') + super().fb2_build(builder) builder.exit_fragment() diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py index ac9f209..569a728 100644 --- a/src/librarian/elements/front/motto.py +++ b/src/librarian/elements/front/motto.py @@ -12,3 +12,5 @@ class Motto(WLElement): EPUB_TAG = HTML_TAG = "div" EPUB_CLASS = HTML_CLASS = "motto" + + FB2_TAG = 'cite' diff --git a/src/librarian/elements/front/motto_podpis.py b/src/librarian/elements/front/motto_podpis.py index 199da84..fd0d101 100644 --- a/src/librarian/elements/front/motto_podpis.py +++ b/src/librarian/elements/front/motto_podpis.py @@ -12,3 +12,5 @@ class MottoPodpis(WLElement): EPUB_TAG = "div" EPUB_CLASS = "motto_podpis" + + FB2_TAG = 'p' diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py index 6ee10d7..726bfcd 100644 --- a/src/librarian/elements/paragraphs/akap.py +++ b/src/librarian/elements/paragraphs/akap.py @@ -16,6 +16,8 @@ class Akap(WLElement): HTML_TAG = 'p' HTML_CLASS = 'wl paragraph' + FB2_TAG = 'p' + has_visible_numbering = True @property diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py index f698bc3..93e3a4a 100644 --- a/src/librarian/elements/poetry/strofa.py +++ b/src/librarian/elements/poetry/strofa.py @@ -16,6 +16,9 @@ class Strofa(WLElement): EPUB_TAG = HTML_TAG = 'div' EPUB_CLASS = HTML_CLASS = 'stanza' + FB2_TAG = 'stanza' + + def epub_build(self, builder): super().epub_build(builder) builder.start_element( diff --git a/src/librarian/elements/poetry/wers.py b/src/librarian/elements/poetry/wers.py index f0f274c..c188845 100644 --- a/src/librarian/elements/poetry/wers.py +++ b/src/librarian/elements/poetry/wers.py @@ -13,6 +13,7 @@ class Wers(WLElement): EPUB_TAG = HTML_TAG = 'div' EPUB_CLASS = 'verse' HTML_CLASS = 'wl verse' + FB2_TAG = 'v' NUMBERING = 'main' diff --git a/src/librarian/elements/separators/sekcja_asterysk.py b/src/librarian/elements/separators/sekcja_asterysk.py index 8cc2019..9c012cf 100644 --- a/src/librarian/elements/separators/sekcja_asterysk.py +++ b/src/librarian/elements/separators/sekcja_asterysk.py @@ -19,3 +19,8 @@ class SekcjaAsterysk(WLElement): epub_build_inner = html_build_inner + def fb2_build(self, builder): + builder.simple_element('empty-line') + builder.simple_element('p', '*') + builder.simple_element('empty-line') + diff --git a/src/librarian/elements/separators/sekcja_swiatlo.py b/src/librarian/elements/separators/sekcja_swiatlo.py index f7ea4a3..edfff1f 100644 --- a/src/librarian/elements/separators/sekcja_swiatlo.py +++ b/src/librarian/elements/separators/sekcja_swiatlo.py @@ -15,3 +15,7 @@ class SekcjaSwiatlo(WLElement): def epub_build_inner(self, builder): builder.push_text("\u00a0") + + def fb2_build(self, builder): + for i in range(3): + builder.simple_element('empty-line') diff --git a/src/librarian/elements/separators/separator_linia.py b/src/librarian/elements/separators/separator_linia.py index ac01f5d..e5b0709 100644 --- a/src/librarian/elements/separators/separator_linia.py +++ b/src/librarian/elements/separators/separator_linia.py @@ -14,4 +14,8 @@ class SeparatorLinia(WLElement): def txt_build_inner(self, builder): builder.push_text('-' * 48) + def fb2_build(self, builder): + builder.simple_element('empty-line') + builder.simple_element('p', '—' * 8) + builder.simple_element('empty-line')