wip change fb2 api
authorRadek Czajka <rczajka@rczajka.pl>
Tue, 22 Oct 2024 13:59:19 +0000 (15:59 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Tue, 22 Oct 2024 13:59:19 +0000 (15:59 +0200)
20 files changed:
src/librarian/__init__.py
src/librarian/builders/__init__.py
src/librarian/builders/fb2.py [new file with mode: 0644]
src/librarian/builders/html.py
src/librarian/elements/base.py
src/librarian/elements/blocks/dedykacja.py
src/librarian/elements/blocks/dlugi_cytat.py
src/librarian/elements/blocks/nota.py
src/librarian/elements/figures/kol.py
src/librarian/elements/figures/tabela.py
src/librarian/elements/figures/wiersz.py
src/librarian/elements/front/base.py
src/librarian/elements/front/motto.py
src/librarian/elements/front/motto_podpis.py
src/librarian/elements/paragraphs/akap.py
src/librarian/elements/poetry/strofa.py
src/librarian/elements/poetry/wers.py
src/librarian/elements/separators/sekcja_asterysk.py
src/librarian/elements/separators/sekcja_swiatlo.py
src/librarian/elements/separators/separator_linia.py

index 0ed908c..4fbed92 100644 (file)
@@ -77,7 +77,8 @@ RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
 DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
 XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
 PLMETNS = XMLNamespace("http://dl.psnc.pl/schemas/plmet/")
-
+FB2NS = XMLNamespace("http://www.gribuser.ru/xml/fictionbook/2.0")
+XLINKNS = XMLNamespace("http://www.w3.org/1999/xlink")
 WLNS = EmptyNamespace()
 
 
index e47c557..85f8427 100644 (file)
@@ -9,6 +9,7 @@ from .daisy import DaisyBuilder
 from .epub import EpubBuilder
 from .mobi import MobiBuilder
 from .pdf import PdfBuilder
+from .fb2 import FB2Builder
 
 
 builders = OrderedDict([
@@ -23,4 +24,5 @@ builders = OrderedDict([
     ("epub", EpubBuilder),
     ("mobi", MobiBuilder),
     ("pdf", PdfBuilder),
+    ("fb2", FB2Builder),
 ])
diff --git a/src/librarian/builders/fb2.py b/src/librarian/builders/fb2.py
new file mode 100644 (file)
index 0000000..f4fe635
--- /dev/null
@@ -0,0 +1,173 @@
+# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
+#
+from lxml import etree
+from librarian import FB2NS, XLINKNS, OutputFile
+from .html import TreeBuilder
+
+
+class FB2Builder(TreeBuilder):
+    file_extension = 'fb2'
+    build_method_fn = 'fb2_build'
+    orphans = False
+    
+    def __init__(self, base_url=None):
+        self.tree = etree.Element(
+            FB2NS('FictionBook'),
+            nsmap={
+                None: FB2NS.uri,
+                'l': XLINKNS.uri,
+            }
+        )
+        description = etree.SubElement(self.tree, 'description')
+        self.body = etree.SubElement(self.tree, 'body')
+        self.header = etree.SubElement(self.body, 'title')
+        self.epigraph = etree.SubElement(self.body, 'epigraph')
+        self.text = etree.SubElement(self.body, 'section')
+
+        self.footnotes = etree.Element(FB2NS('body'), name="notes")
+        self.sections = []
+
+        self.cursors = {
+            None: self.text,
+            'meta': description,
+            'header': self.header,
+            'epigraph': self.epigraph,
+            'footnotes': self.footnotes,
+            #'nota_red': self.nota_red,
+        }
+        self.current_cursors = [self.text]
+        self.add_epigraph()
+
+    def start_section(self, precedence):
+        while self.sections and self.sections[-1] >= precedence:
+            self.end_element()
+            self.sections.pop()
+        self.start_element('section')
+        self.sections.append(precedence)
+
+    def add_epigraph(self):
+        self.enter_fragment('epigraph')
+        self.start_element(FB2NS('p'))
+        self.push_text('Utwór opracowany został w\xa0ramach projektu ')
+        self.start_element(FB2NS('a'), {XLINKNS('href'): 'https://wolnelektury.pl/'})
+        self.push_text('Wolne Lektury')
+        self.end_element()
+        self.push_text(' przez ')
+        self.start_element(FB2NS('a'), {XLINKNS('href'): 'https://fundacja.wolnelektury.pl/'})
+        self.push_text('fundację Wolne Lektury')
+        self.end_element()
+        self.push_text('.')
+        self.end_element()
+        self.exit_fragment()
+
+    def add_meta(self, doc):
+        self.enter_fragment('meta')
+
+        self.start_element('title-info')
+
+        self.start_element('genre')
+        self.push_text('literature')
+        self.end_element()
+        for author in doc.meta.authors:
+            self.start_element('author')
+            self.simple_element('first-name', ' '.join(author.first_names))
+            self.simple_element('last-name', author.last_name)
+            self.end_element()
+        self.simple_element('book-title', doc.meta.title)
+        if doc.meta.released_to_public_domain_at:
+            self.simple_element('date', doc.meta.released_to_public_domain_at)
+        self.simple_element('lang', doc.meta.language)
+        
+        self.end_element()
+
+        self.start_element('document-info')
+        # contributor.editor
+        # contributor.technical_editor
+        self.simple_element('program-used', 'Wolne Lektury Librarian')
+        self.simple_element('date', doc.meta.created_at)
+        self.simple_element('id', str(doc.meta.url))
+        self.simple_element('version', '0')
+        
+        self.end_element()
+        self.start_element('publish-info')
+        self.simple_element('publisher', '; '.join(doc.meta.publisher))
+        self.end_element()
+        self.exit_fragment()
+
+    def build(self, doc, mp3=None):
+        self.add_meta(doc)
+        doc.tree.getroot().fb2_build(self)
+        return self.output()
+
+    def output(self):
+        return OutputFile.from_bytes(
+            etree.tostring(
+                self.tree,
+                encoding='utf-8',
+                pretty_print=True,
+                xml_declaration=True,
+            )
+        )
+
+
+
+
+'''
+import os.path
+from copy import deepcopy
+from lxml import etree
+
+from librarian import functions, OutputFile
+from .epub import replace_by_verse
+
+
+functions.reg_substitute_entities()
+functions.reg_person_name()
+
+
+def sectionify(tree):
+    """Finds section headers and adds a tree of _section tags."""
+    sections = [
+        'naglowek_czesc',
+        'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena',
+        'naglowek_podrozdzial']
+    section_level = dict((v, k) for (k, v) in enumerate(sections))
+
+    # We can assume there are just subelements an no text at section level.
+    for level, section_name in reversed(list(enumerate(sections))):
+        for header in tree.findall('//' + section_name):
+            section = header.makeelement("_section")
+            header.addprevious(section)
+            section.append(header)
+            sibling = section.getnext()
+            while (sibling is not None and
+                    section_level.get(sibling.tag, 1000) > level):
+                section.append(sibling)
+                sibling = section.getnext()
+
+
+def transform(wldoc, verbose=False,
+              cover=None, flags=None):
+    document = deepcopy(wldoc)
+    del wldoc
+
+    if flags:
+        for flag in flags:
+            document.edoc.getroot().set(flag, 'yes')
+
+    document.clean_ed_note()
+    document.clean_ed_note('abstrakt')
+
+    style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt')
+    style = etree.parse(style_filename)
+
+    replace_by_verse(document.edoc)
+    sectionify(document.edoc)
+
+    result = document.transform(style)
+
+    return OutputFile.from_bytes(str(result).encode('utf-8'))
+
+# vim:et
+'''
index 66db675..e6530a5 100644 (file)
@@ -45,6 +45,11 @@ class TreeBuilder:
         else:
             cursor.text = (cursor.text or '') + text
 
+    def simple_element(self, tag, text='', attrib=None):
+        self.start_element(tag, attrib)
+        self.push_text(text)
+        self.end_element()
+
 
 class HtmlBuilder(TreeBuilder):
     build_method_fn = 'html_build'
index 1f22929..2d656ae 100644 (file)
@@ -37,7 +37,9 @@ class WLElement(etree.ElementBase):
     EPUB_ATTR = {}
     EPUB_CLASS = None
     EPUB_START_CHUNK = False
-   
+
+    FB2_TAG = None
+
     CAN_HAVE_TEXT = True
     STRIP = False
     NUMBERING = None
@@ -209,6 +211,25 @@ class WLElement(etree.ElementBase):
         if self.HTML_TAG:
             builder.end_element()
 
+    def fb2_build(self, builder):
+        if self.SECTION_PRECEDENCE:
+            builder.start_section(self.SECTION_PRECEDENCE)
+            builder.start_element('title')
+            builder.start_element('p')
+
+        if self.FB2_TAG:
+            builder.start_element(
+                self.FB2_TAG,
+                #self.get_fb2_attr(builder),
+            )
+
+        self.build_inner(builder)
+        if self.FB2_TAG:
+            builder.end_element()
+        if self.SECTION_PRECEDENCE:
+            builder.end_element()
+            builder.end_element()
+
     def epub_build_inner(self, builder):
         self.build_inner(builder)
 
index ae00b72..a8e7406 100644 (file)
@@ -12,3 +12,5 @@ class Dedykacja(WLElement):
 
     EPUB_TAG = HTML_TAG = "div"
     EPUB_CLASS = HTML_CLASS = "dedication"
+
+    FB2_TAG = 'cite'
index bdac225..cf1515f 100644 (file)
@@ -14,3 +14,5 @@ class DlugiCytat(WLElement):
 
     EPUB_TAG = 'div'
     EPUB_CLASS = 'block'
+
+    FB2_TAG = 'cite'
index dbba883..6bc0ff3 100644 (file)
@@ -10,4 +10,6 @@ class Nota(WLElement):
     EPUB_TAG = HTML_TAG = "div"
     EPUB_CLASS = HTML_CLASS = "note"
 
+    FB2_TAG = 'cite'
+
     SUPPRESS_NUMBERING = {'main': 'i'}
index fa09fc7..342f89a 100644 (file)
@@ -5,5 +5,5 @@ from ..base import WLElement
 
 
 class Kol(WLElement):
-    EPUB_TAG = HTML_TAG = 'td'
+    EPUB_TAG = HTML_TAG = FB2_TAG = 'td'
     TXT_PREFIX = ' ' * 4
index 5ab04e0..5422b61 100644 (file)
@@ -11,7 +11,7 @@ class Tabela(WLElement):
     TXT_TOP_MARGIN = 3
     TXT_BOTTOM_MARGIN = 3
 
-    EPUB_TAG = HTML_TAG = 'table'
+    EPUB_TAG = HTML_TAG = FB2_TAG = 'table'
 
     def get_html_attr(self, builder):
         if self.attrib.get('ramka', '') == '1':
index c013558..7b57997 100644 (file)
@@ -6,6 +6,6 @@ from ..base import WLElement
 
 class Wiersz(WLElement):
     CAN_HAVE_TEXT = False
-    EPUB_TAG = HTML_TAG = 'tr'
+    EPUB_TAG = HTML_TAG = FB2_TAG = 'tr'
     TXT_TOP_MARGIN = 1
     TXT_BOTTOM_MARGIN = 1
index ad16ca3..a0fb305 100644 (file)
@@ -7,13 +7,19 @@ from ..base import WLElement
 class HeaderElement(WLElement):
     NUMBERING = 'i'
     HTML_TAG = 'span'
+    FB2_TAG = 'p'
     
     def txt_build(self, builder):
         builder.enter_fragment('header')
-        super(HeaderElement, self).txt_build(builder)
+        super().txt_build(builder)
         builder.exit_fragment()
 
     def html_build(self, builder):
         builder.enter_fragment('header')
-        super(HeaderElement, self).html_build(builder)
+        super().html_build(builder)
+        builder.exit_fragment()
+
+    def fb2_build(self, builder):
+        builder.enter_fragment('header')
+        super().fb2_build(builder)
         builder.exit_fragment()
index ac9f209..569a728 100644 (file)
@@ -12,3 +12,5 @@ class Motto(WLElement):
 
     EPUB_TAG = HTML_TAG = "div"
     EPUB_CLASS = HTML_CLASS = "motto"
+
+    FB2_TAG = 'cite'
index 199da84..fd0d101 100644 (file)
@@ -12,3 +12,5 @@ class MottoPodpis(WLElement):
 
     EPUB_TAG = "div"
     EPUB_CLASS = "motto_podpis"
+
+    FB2_TAG = 'p'
index 6ee10d7..726bfcd 100644 (file)
@@ -16,6 +16,8 @@ class Akap(WLElement):
     HTML_TAG = 'p'
     HTML_CLASS = 'wl paragraph'
 
+    FB2_TAG = 'p'
+
     has_visible_numbering = True
 
     @property
index f698bc3..93e3a4a 100644 (file)
@@ -16,6 +16,9 @@ class Strofa(WLElement):
     EPUB_TAG = HTML_TAG = 'div'
     EPUB_CLASS = HTML_CLASS = 'stanza'
 
+    FB2_TAG = 'stanza'
+    
+
     def epub_build(self, builder):
         super().epub_build(builder)
         builder.start_element(
index f0f274c..c188845 100644 (file)
@@ -13,6 +13,7 @@ class Wers(WLElement):
     EPUB_TAG = HTML_TAG = 'div'
     EPUB_CLASS = 'verse'
     HTML_CLASS = 'wl verse'
+    FB2_TAG = 'v'
 
     NUMBERING = 'main'
 
index 8cc2019..9c012cf 100644 (file)
@@ -19,3 +19,8 @@ class SekcjaAsterysk(WLElement):
 
     epub_build_inner = html_build_inner
 
+    def fb2_build(self, builder):
+        builder.simple_element('empty-line')
+        builder.simple_element('p', '*')
+        builder.simple_element('empty-line')
+
index f7ea4a3..edfff1f 100644 (file)
@@ -15,3 +15,7 @@ class SekcjaSwiatlo(WLElement):
 
     def epub_build_inner(self, builder):
         builder.push_text("\u00a0")
+
+    def fb2_build(self, builder):
+        for i in range(3):
+            builder.simple_element('empty-line')
index ac01f5d..e5b0709 100644 (file)
@@ -14,4 +14,8 @@ class SeparatorLinia(WLElement):
     def txt_build_inner(self, builder):
         builder.push_text('-' * 48)
 
+    def fb2_build(self, builder):
+        builder.simple_element('empty-line')
+        builder.simple_element('p', '—' * 8)
+        builder.simple_element('empty-line')