New Element-based builder API (WiP).
authorRadek Czajka <rczajka@rczajka.pl>
Mon, 31 Aug 2020 13:29:43 +0000 (15:29 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Mon, 31 Aug 2020 13:29:43 +0000 (15:29 +0200)
66 files changed:
setup.py
src/librarian/builders/__init__.py [new file with mode: 0644]
src/librarian/builders/html.py [new file with mode: 0644]
src/librarian/builders/sanitize.py [new file with mode: 0644]
src/librarian/builders/txt.py [new file with mode: 0644]
src/librarian/command_line.py [new file with mode: 0644]
src/librarian/dcparser.py
src/librarian/document.py [new file with mode: 0644]
src/librarian/elements/__init__.py [new file with mode: 0644]
src/librarian/elements/base.py [new file with mode: 0644]
src/librarian/elements/blocks/__init__.py [new file with mode: 0644]
src/librarian/elements/blocks/dlugi_cytat.py [new file with mode: 0644]
src/librarian/elements/blocks/nota.py [new file with mode: 0644]
src/librarian/elements/blocks/poezja_cyt.py [new file with mode: 0644]
src/librarian/elements/comments/__init__.py [new file with mode: 0644]
src/librarian/elements/comments/abstrakt.py [new file with mode: 0644]
src/librarian/elements/comments/uwaga.py [new file with mode: 0644]
src/librarian/elements/drama/__init__.py [new file with mode: 0644]
src/librarian/elements/drama/didask_tekst.py [new file with mode: 0644]
src/librarian/elements/drama/didaskalia.py [new file with mode: 0644]
src/librarian/elements/drama/kwestia.py [new file with mode: 0644]
src/librarian/elements/drama/lista_osob.py [new file with mode: 0644]
src/librarian/elements/drama/lista_osoba.py [new file with mode: 0644]
src/librarian/elements/drama/naglowek_listy.py [new file with mode: 0644]
src/librarian/elements/drama/naglowek_osoba.py [new file with mode: 0644]
src/librarian/elements/drama/osoba.py [new file with mode: 0644]
src/librarian/elements/figures/__init__.py [new file with mode: 0644]
src/librarian/elements/figures/ilustr.py [new file with mode: 0644]
src/librarian/elements/footnotes/__init__.py [new file with mode: 0644]
src/librarian/elements/front/__init__.py [new file with mode: 0644]
src/librarian/elements/front/autor_utworu.py [new file with mode: 0644]
src/librarian/elements/front/base.py [new file with mode: 0644]
src/librarian/elements/front/dzielo_nadrzedne.py [new file with mode: 0644]
src/librarian/elements/front/motto.py [new file with mode: 0644]
src/librarian/elements/front/motto_podpis.py [new file with mode: 0644]
src/librarian/elements/front/nazwa_utworu.py [new file with mode: 0644]
src/librarian/elements/front/podtytul.py [new file with mode: 0644]
src/librarian/elements/headers/__init__.py [new file with mode: 0644]
src/librarian/elements/headers/naglowek_czesc.py [new file with mode: 0644]
src/librarian/elements/headers/naglowek_podrozdzial.py [new file with mode: 0644]
src/librarian/elements/headers/naglowek_rozdzial.py [new file with mode: 0644]
src/librarian/elements/masters/__init__.py [new file with mode: 0644]
src/librarian/elements/paragraphs/__init__.py [new file with mode: 0644]
src/librarian/elements/paragraphs/akap.py [new file with mode: 0644]
src/librarian/elements/poetry/__init__.py [new file with mode: 0644]
src/librarian/elements/poetry/strofa.py [new file with mode: 0644]
src/librarian/elements/poetry/wers.py [new file with mode: 0644]
src/librarian/elements/poetry/wers_cd.py [new file with mode: 0644]
src/librarian/elements/poetry/wers_wciety.py [new file with mode: 0644]
src/librarian/elements/poetry/zastepnik_wersu.py [new file with mode: 0644]
src/librarian/elements/root/__init__.py [new file with mode: 0644]
src/librarian/elements/separators/__init__.py [new file with mode: 0644]
src/librarian/elements/separators/sekcja_asterysk.py [new file with mode: 0644]
src/librarian/elements/separators/sekcja_swiatlo.py [new file with mode: 0644]
src/librarian/elements/separators/separator_linia.py [new file with mode: 0644]
src/librarian/elements/styles/__init__.py [new file with mode: 0644]
src/librarian/elements/styles/slowo_obce.py [new file with mode: 0644]
src/librarian/elements/styles/tytul_dziela.py [new file with mode: 0644]
src/librarian/elements/styles/wyroznienie.py [new file with mode: 0644]
src/librarian/elements/themes/__init__.py [new file with mode: 0644]
src/librarian/elements/themes/begin.py [new file with mode: 0644]
src/librarian/elements/themes/end.py [new file with mode: 0644]
src/librarian/elements/themes/motyw.py [new file with mode: 0644]
src/librarian/parser.py
tests/test_text.py
tox.ini

index 0466e08..1ddf324 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -41,6 +41,11 @@ setup(
         'texml',
         'ebooklib',
     ],
         'texml',
         'ebooklib',
     ],
+    entry_points = {
+        "console_scripts": [
+            "librarian=librarian.command_line:main"
+        ]
+    },
     scripts=['scripts/book2html',
              'scripts/book2txt',
              'scripts/book2epub',
     scripts=['scripts/book2html',
              'scripts/book2txt',
              'scripts/book2epub',
diff --git a/src/librarian/builders/__init__.py b/src/librarian/builders/__init__.py
new file mode 100644 (file)
index 0000000..fcd9194
--- /dev/null
@@ -0,0 +1,14 @@
+from .txt import TxtBuilder
+from .html import HtmlBuilder
+from .sanitize import Sanitizer
+
+
+builders = [
+    TxtBuilder,
+    HtmlBuilder,
+    Sanitizer,
+]
+
+
+def get_builder_class(builder_id):
+    return next(b for b in builders if b.identifier == builder_id)
diff --git a/src/librarian/builders/html.py b/src/librarian/builders/html.py
new file mode 100644 (file)
index 0000000..8015c6a
--- /dev/null
@@ -0,0 +1,72 @@
+from lxml import etree
+from librarian import OutputFile
+
+
+class HtmlBuilder:
+    file_extension = "html"
+    identifier = "html"
+
+    def __init__(self, image_location='https://wolnelektury.pl/media/book/pictures/marcos-historia-kolorow/'):
+        self.image_location = image_location
+
+        #self.tree = etree.Element('html')
+        #body = etree.SubElement(self.tree, 'body')
+        #text = etree.SubElement(body, 'div', **{'id': 'book-text'})
+        self.tree = text = etree.Element('div', **{'id': 'book-text'})
+        toc = etree.SubElement(text, 'div', id='toc')
+        themes = etree.SubElement(text, 'div', id='themes')
+        h1 = etree.SubElement(text, 'h1')
+
+        self.cursors = {
+            None: text,
+            'toc': toc,
+            'themes': themes,
+            'header': h1,
+        }
+        self.current_cursors = [None]
+
+    def enter_fragment(self, fragment):
+        self.current_cursors.append(fragment)
+
+    def exit_fragment(self):
+        self.current_cursors.pop()
+        
+    def build(self, document):
+        document.tree.getroot().html_build(self)
+
+        head = etree.Element('head')
+        self.tree.insert(0, head)
+        etree.SubElement(
+            head,
+            'link',
+            href="https://static.wolnelektury.pl/css/compressed/book_text.b15153e56c0a.css",
+            rel="stylesheet",
+            type="text/css",
+        )
+        
+        return OutputFile.from_bytes(
+            etree.tostring(
+                self.tree,
+                method='html',
+                encoding='utf-8',
+                pretty_print=True
+            )
+        )
+
+    def start_element(self, tag, attrib):
+        self.cursors[self.current_cursors[-1]] = etree.SubElement(
+            self.cursors[self.current_cursors[-1]],
+            tag,
+            **attrib
+        )
+        print(self.cursors)
+
+    def end_element(self):
+        self.cursors[self.current_cursors[-1]] = self.cursors[self.current_cursors[-1]].getparent()
+
+    def push_text(self, text):
+        cursor = self.cursors[self.current_cursors[-1]]
+        if len(cursor):
+            cursor.tail = (cursor[-1].tail or '') + text
+        else:
+            cursor.text = (cursor.text or '') + text
diff --git a/src/librarian/builders/sanitize.py b/src/librarian/builders/sanitize.py
new file mode 100644 (file)
index 0000000..4d7f7f9
--- /dev/null
@@ -0,0 +1,18 @@
+from lxml import etree
+from librarian import OutputFile
+
+
+class Sanitizer:
+    identifier = 'sanitize'
+    file_extension = 'xml2'
+
+    def build(self, document):
+        doc = document.tree.getroot() # TODO: copy
+        doc.sanitize()
+        return OutputFile.from_bytes(
+            etree.tostring(
+                doc,
+                encoding='utf-8',
+            )
+        )
+
diff --git a/src/librarian/builders/txt.py b/src/librarian/builders/txt.py
new file mode 100644 (file)
index 0000000..4023814
--- /dev/null
@@ -0,0 +1,167 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import io
+from librarian import OutputFile, get_resource
+
+
+with io.open(get_resource("res/text/template.txt")) as f:
+    TEMPLATE = f.read()
+
+
+class TxtFragment:
+    def __init__(self):
+        self.pieces = []
+        self.current_margin = 0
+        self.starting_block = True
+
+    def push_legacy_margin(self, margin):
+        if margin:
+            if self.pieces:
+                self.pieces[-1] = self.pieces[-1].rstrip(' ')
+            self.pieces.append('\r\n' * margin)
+            self.current_margin += margin
+            self.starting_block = True
+        
+    def push_margin(self, margin):
+        if margin:
+            if self.pieces:
+                self.pieces[-1] = self.pieces[-1].rstrip(' ')
+            if margin > self.current_margin:
+                self.pieces.append('\r\n' * (margin - self.current_margin))
+                self.current_margin = margin
+                self.starting_block = True
+
+    def push_text(self, text, prepared=False):
+        if text:
+            if self.starting_block and not prepared:
+                text = text.lstrip()
+            self.pieces.append(text)
+            self.current_margin = 0
+            if not prepared:
+                self.starting_block = False
+
+
+class TxtBuilder:
+    """
+    """
+    file_extension = "txt"
+    identifier = "txt"
+
+    default_license_description = {
+        "pol": (
+            "Ten utwór nie jest objęty majątkowym prawem autorskim "
+            "i znajduje się w domenie publicznej, co oznacza że "
+            "możesz go swobodnie wykorzystywać, publikować "
+            "i rozpowszechniać. Jeśli utwór opatrzony jest "
+            "dodatkowymi materiałami (przypisy, motywy literackie "
+            "etc.), które podlegają prawu autorskiemu, to te "
+            "dodatkowe materiały udostępnione są na licencji "
+            "Creative Commons Uznanie Autorstwa – Na Tych Samych "
+            "Warunkach 3.0 PL "
+            "(http://creativecommons.org/licenses/by-sa/3.0/)"
+        )
+    }
+    license_description = {
+        "pol": "Ten utwór jest udostępniony na licencji {meta.license_description}: \n{meta.license}",
+    }
+
+    def __init__(self):
+        self.fragments = {
+            None: TxtFragment(),
+            'header': TxtFragment()
+        }
+        self.current_fragments = [self.fragments[None]]
+
+    def enter_fragment(self, fragment):
+        self.current_fragments.append(self.fragments[fragment])
+
+    def exit_fragment(self):
+        self.current_fragments.pop()
+        
+    def push_text(self, text, prepared=False):
+        self.current_fragments[-1].push_text(text, prepared=prepared)
+
+    def push_margin(self, margin):
+        self.current_fragments[-1].push_margin(margin)
+        
+    def push_legacy_margin(self, margin, where=None):
+        self.current_fragments[-1].push_legacy_margin(margin)
+        
+    def build(self, document, raw_text=False):
+        document.tree.getroot().txt_build(self)
+        meta = document.meta
+
+        self.enter_fragment('header')
+        if meta.translators:
+            self.push_text("tłum. ", 'header')
+            for translator in meta.translators:
+                self.push_text(translator.readable())
+            #builder.push_margin(2)
+            self.push_legacy_margin(1)
+
+        if meta.isbn_txt:
+            #builder.push_margin(2)
+            self.push_legacy_margin(1)
+            isbn = meta.isbn_txt
+            if isbn.startswith(('ISBN-' , 'ISBN ')):
+                isbn = isbn[5:]
+            self.push_text('ISBN {isbn}'.format(isbn=isbn))
+            #builder.push_margin(5)
+
+        #builder.push_margin(4)
+        self.push_legacy_margin(1)
+        self.exit_fragment()
+        
+        text = ''.join(self.fragments['header'].pieces) +  ''.join(self.fragments[None].pieces)
+
+        if raw_text:
+            result = text
+        else:
+            if meta.license:
+                license_description = self.license_description['pol'].format(meta=meta)
+            else:
+                license_description = self.default_license_description['pol']
+
+            if meta.source_name:
+                source = "\n\nTekst opracowany na podstawie: " + meta.source_name
+            else:
+                source = ''
+
+            contributors = ', '.join(
+                person.readable()
+                for person in sorted(set(
+                    p for p in (
+                        meta.technical_editors + meta.editors
+                    ) if p))
+            )
+            if contributors:
+                contributors = (
+                    "\n\nOpracowanie redakcyjne i przypisy: %s."
+                    % contributors
+                )
+
+            funders = ', '.join(meta.funders)
+            if funders:
+                funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
+
+            isbn = getattr(meta, 'isbn_txt', None)
+            if isbn:
+                isbn = '\n\n' + isbn
+            else:
+                isbn = ''
+                
+            result = TEMPLATE % {
+                "text": text,
+                "description": meta.description,
+                "url": meta.url,
+                "license_description": license_description,
+                "source": source,
+                "contributors": contributors,
+                "funders": funders,
+                "publisher":  '\n\nWydawca: ' + ', '.join(meta.publisher),
+                "isbn": isbn,
+            }
+
+        result = '\r\n'.join(result.splitlines()) + '\r\n'
+        return OutputFile.from_bytes(result.encode('utf-8'))
diff --git a/src/librarian/command_line.py b/src/librarian/command_line.py
new file mode 100644 (file)
index 0000000..91196f1
--- /dev/null
@@ -0,0 +1,43 @@
+import argparse
+import os.path
+from .builders import builders
+from .document import WLDocument
+
+
+def main(*args, **kwargs):
+    parser = argparse.ArgumentParser(description="PARSER DESCRIPTION")
+
+    parser.add_argument(
+        'builder',
+        choices=[b.identifier for b in builders],
+        help="Builder"
+    )
+    parser.add_argument('input_file')
+    parser.add_argument(
+        '-o', '--output-file', metavar='FILE',
+        help='specifies the output file'
+    )
+    parser.add_argument(
+        '-O', '--output-dir', metavar='DIR',
+        help='specifies the directory for output'
+    )
+
+    args = parser.parse_args()
+
+    if args.output_file:
+        output_file_path = args.output_file
+    else:
+        output_file_path = '.'.join((
+            os.path.splitext(args.input_file)[0],
+            builder.file_extension
+        ))
+        if args.output_dir:
+            output_file_path = '/'.join((
+                args.output_dir,
+                output_file_path.rsplit('/', 1)[-1]
+            ))
+
+    document = WLDocument(filename=args.input_file)
+    output = document.build(args.builder)
+    with open(output_file_path, 'wb') as f:
+        f.write(output.get_bytes())
index 432b580..2072695 100644 (file)
@@ -212,7 +212,7 @@ class Field(object):
                 % (self.uri, e.message)
             )
 
                 % (self.uri, e.message)
             )
 
-    def validate(self, fdict, fallbacks=None, strict=False):
+    def validate(self, fdict, fallbacks=None, strict=False, validate_required=True):
         if fallbacks is None:
             fallbacks = {}
         if self.uri not in fdict:
         if fallbacks is None:
             fallbacks = {}
         if self.uri not in fdict:
@@ -227,8 +227,10 @@ class Field(object):
                     f = [fallbacks[self.salias]]
                 else:
                     f = self.default
                     f = [fallbacks[self.salias]]
                 else:
                     f = self.default
-            else:
+            elif validate_required:
                 raise ValidationError("Required field %s not found" % self.uri)
                 raise ValidationError("Required field %s not found" % self.uri)
+            else:
+                return None
         else:
             f = fdict[self.uri]
 
         else:
             f = fdict[self.uri]
 
@@ -363,7 +365,7 @@ class WorkInfo(six.with_metaclass(DCInfo, object)):
 
         return cls(desc.attrib, field_dict, *args, **kwargs)
 
 
         return cls(desc.attrib, field_dict, *args, **kwargs)
 
-    def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
+    def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False, validate_required=True):
         """
         rdf_attrs should be a dictionary-like object with any attributes
         of the RDF:Description.
         """
         rdf_attrs should be a dictionary-like object with any attributes
         of the RDF:Description.
@@ -376,7 +378,7 @@ class WorkInfo(six.with_metaclass(DCInfo, object)):
 
         for field in self.FIELDS:
             value = field.validate(dc_fields, fallbacks=fallbacks,
 
         for field in self.FIELDS:
             value = field.validate(dc_fields, fallbacks=fallbacks,
-                                   strict=strict)
+                                   strict=strict, validate_required=validate_required)
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias:
             setattr(self, 'prop_' + field.name, value)
             self.fmap[field.name] = field
             if field.salias:
diff --git a/src/librarian/document.py b/src/librarian/document.py
new file mode 100644 (file)
index 0000000..1bd249d
--- /dev/null
@@ -0,0 +1,24 @@
+from lxml import etree
+from .builders import get_builder_class
+from .parser import parser
+from . import dcparser
+
+
+class WLDocument:
+    def __init__(self, tree=None, filename=None):
+        if filename is not None:
+            tree = etree.parse(filename, parser=parser)
+        self.tree = tree
+        tree.getroot().document = self
+        self.base_meta = dcparser.BookInfo({}, {}, validate_required=False)
+
+    @property
+    def meta(self):
+        # Allow metadata of the master element as document meta.
+        #master = self.tree.getroot()[-1]
+        return self.tree.getroot().meta
+        return master.meta
+
+    def build(self, builder_id, **kwargs):
+        return get_builder_class(builder_id)().build(self, **kwargs)
+        
diff --git a/src/librarian/elements/__init__.py b/src/librarian/elements/__init__.py
new file mode 100644 (file)
index 0000000..512b4ef
--- /dev/null
@@ -0,0 +1,115 @@
+from lxml import etree
+from . import (blocks, comments, drama, figures, footnotes, front, headers,
+               masters, paragraphs, poetry, root, separators, styles, themes)
+
+
+WL_ELEMENTS = {
+    'meta': etree.ElementBase,
+    'coverClass': etree.ElementBase,
+    "developmentStage": etree.ElementBase,
+    "coverBarColor": etree.ElementBase,
+    "coverBoxPosition": etree.ElementBase, 
+    "coverLogoUrl": etree.ElementBase,
+   
+    "utwor": root.Utwor,
+    "dramat_wierszowany_l": masters.Master,
+    "dramat_wierszowany_lp": masters.Master,
+    "dramat_wspolczesny": masters.Master,
+    "liryka_l": masters.Master,
+    "liryka_lp": masters.Master,
+    "opowiadanie": masters.Master,
+    "powiesc": masters.Master,
+
+    "autor_utworu": front.AutorUtworu,
+    "dzielo_nadrzedne": front.DzieloNadrzedne,
+    "nazwa_utworu": front.NazwaUtworu,
+    "podtytul": front.Podtytul,
+
+    "lista_osob": drama.ListaOsob,
+    "lista_osoba": drama.ListaOsoba,
+    "naglowek_osoba": drama.NaglowekOsoba,
+    "osoba": drama.Osoba,
+
+    "dlugi_cytat": blocks.DlugiCytat,
+    "poezja_cyt": blocks.PoezjaCyt,
+    "dlugi_cyt": blocks.DlugiCytat,  ### ???
+    
+    "slowo_obce": styles.SlowoObce,
+    "tytul_dziela": styles.TytulDziela,
+    "wyroznienie": styles.Wyroznienie,
+
+    "akap": paragraphs.Akap,
+    "akap_cd": paragraphs.Akap,
+    "akap_dialog": paragraphs.Akap,
+
+    "motto_podpis": front.MottoPodpis,
+
+    "strofa": poetry.Strofa,
+
+    "motto": front.Motto,
+
+    "didaskalia": drama.Didaskalia,
+    "kwestia": drama.Kwestia,
+    "didask_tekst": drama.DidaskTekst,
+
+    "dedykacja": paragraphs.Akap,
+    "miejsce_czas": paragraphs.Akap,
+
+    "uwaga": comments.Uwaga,
+
+    "wers": poetry.Wers,
+    "wers_wciety": poetry.WersWciety,
+    "wers_cd": poetry.WersCd,
+    "wers_akap": poetry.Wers,
+    "zastepnik_wersu": poetry.ZastepnikWersu,
+    "wers_do_prawej": poetry.Wers,
+    
+    "pa": footnotes.Footnote,
+    "pe": footnotes.Footnote,
+    "pr": footnotes.Footnote,
+    "pt": footnotes.Footnote,
+
+    "begin": themes.Begin,
+    "end": themes.End,
+    "motyw": themes.Motyw,
+
+    "nota": blocks.Nota,
+
+    "nota_red": comments.Abstrakt,
+    "extra": comments.Abstrakt,
+    "abstrakt": comments.Abstrakt,
+
+    "naglowek_czesc": headers.NaglowekCzesc,
+    "naglowek_akt": headers.NaglowekCzesc,
+    "naglowek_scena": headers.NaglowekRozdzial,
+    "naglowek_rozdzial": headers.NaglowekRozdzial,
+    "naglowek_podrozdzial": headers.NaglowekPodrozdzial,
+    "srodtytul": headers.NaglowekCzesc,
+
+    "naglowek_listy": drama.NaglowekListy,
+
+    "sekcja_asterysk": separators.SekcjaAsterysk,
+    "sekcja_swiatlo": separators.SekcjaSwiatlo,
+    "separator_linia": separators.SeparatorLinia,
+
+    "wieksze_odstepy": styles.Wyroznienie,
+    "mat": styles.Wyroznienie,
+    "www": styles.Wyroznienie,
+    "indeks_dolny": styles.Wyroznienie,
+
+    "tabela": paragraphs.Akap,
+    "tabelka": paragraphs.Akap,
+    "wiersz": paragraphs.Akap,
+    "kol": paragraphs.Akap,
+
+    "ilustr": figures.Ilustr,
+
+#    sklodowska-badanie-cial-radioaktywnych.xml
+    "mrow": paragraphs.Akap,
+    "mi": paragraphs.Akap,
+    "mo": paragraphs.Akap,
+    "msup": paragraphs.Akap,
+    "mn": paragraphs.Akap,
+    "mfrac": paragraphs.Akap,
+    "mfenced": paragraphs.Akap,
+}
diff --git a/src/librarian/elements/base.py b/src/librarian/elements/base.py
new file mode 100644 (file)
index 0000000..fb5e3b1
--- /dev/null
@@ -0,0 +1,139 @@
+# -*- coding: utf-8
+
+import re
+from lxml import etree
+from librarian import dcparser, RDFNS
+
+
+class WLElement(etree.ElementBase):
+    TXT_TOP_MARGIN = 0
+    TXT_BOTTOM_MARGIN = 0
+    TXT_PREFIX = ""
+    TXT_SUFFIX = ""
+
+    HTML_TAG = None
+    HTML_ATTR = {}
+    HTML_CLASS = None
+    HTML_SECTION = False
+    
+    CAN_HAVE_TEXT = True
+    STRIP = False
+
+    text_substitutions = [
+        (u'---', u'—'),
+        (u'--', u'–'),
+        (u'...', u'…'),
+        (u',,', u'„'),
+        (u'"', u'”'),
+        ('\ufeff', ''),
+    ]
+
+    @property
+    def meta_object(self):
+        if not hasattr(self, '_meta_object'):
+            elem = self.find(RDFNS('RDF'))
+            if elem is not None:
+                self._meta_object = dcparser.BookInfo.from_element(elem)
+            else:
+                self._meta_object = None
+        return self._meta_object
+    
+    @property
+    def meta(self):
+        if self.meta_object is not None:
+            return self.meta_object
+        else:
+            if self.getparent() is not None:
+                return self.getparent().meta
+            else:
+                return self.document.base_meta
+    
+    def normalize_text(self, text):
+        text = text or ''
+        for e, s in self.text_substitutions:
+            text = text.replace(e, s)
+        text = re.sub(r'\s+', ' ', text)
+        return text
+
+    def _build_inner(self, builder, build_method):
+        child_count = len(self)
+        if self.CAN_HAVE_TEXT and self.text:
+            text = self.normalize_text(self.text)
+            if self.STRIP:
+                text = text.lstrip()
+                if not child_count:
+                    text = text.rstrip()
+            builder.push_text(text)
+        for i, child in enumerate(self):
+            if isinstance(child, WLElement):
+                getattr(child, build_method)(builder)
+            if self.CAN_HAVE_TEXT and child.tail:
+                text = self.normalize_text(child.tail)
+                if self.STRIP and i == child_count - 1:
+                    text = text.rstrip()
+                builder.push_text(text)
+
+    def _txt_build_inner(self, builder):
+        self._build_inner(builder, 'txt_build')
+
+    def txt_build(self, builder):
+        if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
+            builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
+        else:
+            builder.push_margin(self.TXT_TOP_MARGIN)
+        builder.push_text(self.TXT_PREFIX, True)
+        self._txt_build_inner(builder)
+        builder.push_text(self.TXT_SUFFIX, True)
+        if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
+            builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
+        else:
+            builder.push_margin(self.TXT_BOTTOM_MARGIN)
+
+    def _html_build_inner(self, builder):
+        self._build_inner(builder, 'html_build')
+
+    def get_html_attr(self, builder):
+        attr = self.HTML_ATTR.copy()
+        if self.HTML_CLASS:
+            attr['class'] = self.HTML_CLASS
+        # always copy the id attribute (?)
+        if self.attrib.get('id'):
+            attr['id'] = self.attrib['id']
+        return attr
+        
+    def html_build(self, builder):
+        if self.HTML_SECTION:
+            builder.start_element(
+                'a', {"name": "f18", "class": "target"}
+            )
+            builder.push_text(" ")
+            builder.end_element()
+
+            builder.start_element(
+                "a", {"href": "#f18", "class": "anchor"}
+            )
+            builder.push_text("18")
+            builder.end_element()
+        
+
+        if self.HTML_TAG:
+            builder.start_element(
+                self.HTML_TAG,
+                self.get_html_attr(builder),
+            )
+
+        if self.HTML_SECTION:
+            builder.start_element(
+                "a", {"name": "sec34"}
+            )
+            builder.end_element()
+
+        self._html_build_inner(builder)
+        if self.HTML_TAG:
+            builder.end_element()
+
+    def sanitize(self):
+        # TODO: Remove insanity here.
+        for e in self:
+            if isinstance(e, WLElement):
+                e.sanitize()
diff --git a/src/librarian/elements/blocks/__init__.py b/src/librarian/elements/blocks/__init__.py
new file mode 100644 (file)
index 0000000..075493a
--- /dev/null
@@ -0,0 +1,3 @@
+from .dlugi_cytat import DlugiCytat
+from .nota import Nota
+from .poezja_cyt import PoezjaCyt
diff --git a/src/librarian/elements/blocks/dlugi_cytat.py b/src/librarian/elements/blocks/dlugi_cytat.py
new file mode 100644 (file)
index 0000000..8137867
--- /dev/null
@@ -0,0 +1,10 @@
+from ..base import WLElement
+
+
+class DlugiCytat(WLElement):
+    CAN_HAVE_TEXT = False
+
+    TXT_TOP_MARGIN = 3
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 0
diff --git a/src/librarian/elements/blocks/nota.py b/src/librarian/elements/blocks/nota.py
new file mode 100644 (file)
index 0000000..c38021d
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class Nota(WLElement):
+    CAN_HAVE_TEXT = False
diff --git a/src/librarian/elements/blocks/poezja_cyt.py b/src/librarian/elements/blocks/poezja_cyt.py
new file mode 100644 (file)
index 0000000..3349567
--- /dev/null
@@ -0,0 +1,10 @@
+from ..base import WLElement
+
+
+class PoezjaCyt(WLElement):
+    CAN_HAVE_TEXT = False
+
+    TXT_TOP_MARGIN = 3
+    TXT_BOTTOM_MARGIN = 3
+    TXT_LEGACY_TOP_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 0
diff --git a/src/librarian/elements/comments/__init__.py b/src/librarian/elements/comments/__init__.py
new file mode 100644 (file)
index 0000000..9073a91
--- /dev/null
@@ -0,0 +1,2 @@
+from .abstrakt import Abstrakt
+from .uwaga import Uwaga
diff --git a/src/librarian/elements/comments/abstrakt.py b/src/librarian/elements/comments/abstrakt.py
new file mode 100644 (file)
index 0000000..9b43dc3
--- /dev/null
@@ -0,0 +1,9 @@
+from ..base import WLElement
+
+
+class Abstrakt(WLElement):
+    def txt_build(self, builder):
+        pass
+
+    def html_build(self, builder):
+        pass
diff --git a/src/librarian/elements/comments/uwaga.py b/src/librarian/elements/comments/uwaga.py
new file mode 100644 (file)
index 0000000..5a5e26c
--- /dev/null
@@ -0,0 +1,7 @@
+from ..base import WLElement
+
+
+class Uwaga(WLElement):
+    def txt_build(self, builder):
+        pass
+
diff --git a/src/librarian/elements/drama/__init__.py b/src/librarian/elements/drama/__init__.py
new file mode 100644 (file)
index 0000000..1c88a6a
--- /dev/null
@@ -0,0 +1,8 @@
+from .didaskalia import Didaskalia
+from .didask_tekst import DidaskTekst
+from .kwestia import Kwestia
+from .lista_osoba import ListaOsoba
+from .lista_osob import ListaOsob
+from .naglowek_listy import NaglowekListy
+from .naglowek_osoba import NaglowekOsoba
+from .osoba import Osoba
diff --git a/src/librarian/elements/drama/didask_tekst.py b/src/librarian/elements/drama/didask_tekst.py
new file mode 100644 (file)
index 0000000..7b6ae54
--- /dev/null
@@ -0,0 +1,6 @@
+from ..base import WLElement
+
+
+class DidaskTekst(WLElement):
+    TXT_PREFIX = "/ "
+    TXT_SUFFIX = " /"
diff --git a/src/librarian/elements/drama/didaskalia.py b/src/librarian/elements/drama/didaskalia.py
new file mode 100644 (file)
index 0000000..cdd7900
--- /dev/null
@@ -0,0 +1,11 @@
+from ..base import WLElement
+
+
+class Didaskalia(WLElement):
+    TXT_TOP_PARGIN = 2
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 2
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+    TXT_PREFIX = "/ "
+    TXT_SUFFIX = " /"
+
diff --git a/src/librarian/elements/drama/kwestia.py b/src/librarian/elements/drama/kwestia.py
new file mode 100644 (file)
index 0000000..0bb5f3d
--- /dev/null
@@ -0,0 +1,6 @@
+from ..base import WLElement
+
+
+class Kwestia(WLElement):
+    CAN_HAVE_TEXT = False
+
diff --git a/src/librarian/elements/drama/lista_osob.py b/src/librarian/elements/drama/lista_osob.py
new file mode 100644 (file)
index 0000000..ec18472
--- /dev/null
@@ -0,0 +1,11 @@
+from ..base import WLElement
+
+
+class ListaOsob(WLElement):
+    CAN_HAVE_TEXT = False
+
+    TXT_TOP_MARGIN = 3
+    TXT_BOTTOM_MARGIN = 3
+    TXT_LEGACY_TOP_MARGIN = 3
+    TXT_LEGACY_BOTTOM_MARGIN = 1
+
diff --git a/src/librarian/elements/drama/lista_osoba.py b/src/librarian/elements/drama/lista_osoba.py
new file mode 100644 (file)
index 0000000..5759c50
--- /dev/null
@@ -0,0 +1,10 @@
+from ..base import WLElement
+
+
+class ListaOsoba(WLElement):
+    TXT_TOP_MARGIN = 1
+    TXT_BOTTOM_MARGIN = 1
+    TXT_LEGACY_TOP_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+    TXT_PREFIX = " * "
+
diff --git a/src/librarian/elements/drama/naglowek_listy.py b/src/librarian/elements/drama/naglowek_listy.py
new file mode 100644 (file)
index 0000000..398a055
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class NaglowekListy(WLElement):
+    pass
diff --git a/src/librarian/elements/drama/naglowek_osoba.py b/src/librarian/elements/drama/naglowek_osoba.py
new file mode 100644 (file)
index 0000000..076936b
--- /dev/null
@@ -0,0 +1,9 @@
+from ..base import WLElement
+
+
+class NaglowekOsoba(WLElement):
+    TXT_TOP_MARGIN = 3
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 3
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+
diff --git a/src/librarian/elements/drama/osoba.py b/src/librarian/elements/drama/osoba.py
new file mode 100644 (file)
index 0000000..fea8d60
--- /dev/null
@@ -0,0 +1,6 @@
+from ..base import WLElement
+
+
+class Osoba(WLElement):
+    pass
+
diff --git a/src/librarian/elements/figures/__init__.py b/src/librarian/elements/figures/__init__.py
new file mode 100644 (file)
index 0000000..a0c464a
--- /dev/null
@@ -0,0 +1 @@
+from .ilustr import Ilustr
diff --git a/src/librarian/elements/figures/ilustr.py b/src/librarian/elements/figures/ilustr.py
new file mode 100644 (file)
index 0000000..bd51453
--- /dev/null
@@ -0,0 +1,10 @@
+from ..base import WLElement
+
+
+class Ilustr(WLElement):
+    HTML_TAG = 'img'
+
+    def get_html_attr(self, builder):
+        return {
+            'src': builder.image_location + self.attrib['src']
+        }
diff --git a/src/librarian/elements/footnotes/__init__.py b/src/librarian/elements/footnotes/__init__.py
new file mode 100644 (file)
index 0000000..eefe9db
--- /dev/null
@@ -0,0 +1,7 @@
+from ..base import WLElement
+
+
+class Footnote(WLElement):
+    def txt_build(self, builder):
+        pass
+
diff --git a/src/librarian/elements/front/__init__.py b/src/librarian/elements/front/__init__.py
new file mode 100644 (file)
index 0000000..44e7a99
--- /dev/null
@@ -0,0 +1,6 @@
+from .autor_utworu import AutorUtworu
+from .dzielo_nadrzedne import DzieloNadrzedne
+from .motto_podpis import MottoPodpis
+from .motto import Motto
+from .nazwa_utworu import NazwaUtworu
+from .podtytul import Podtytul
diff --git a/src/librarian/elements/front/autor_utworu.py b/src/librarian/elements/front/autor_utworu.py
new file mode 100644 (file)
index 0000000..fd6b2e8
--- /dev/null
@@ -0,0 +1,8 @@
+from .base import HeaderElement
+
+
+class AutorUtworu(HeaderElement):
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_BOTTOM_MARGIN = 2
+
+    HTML_CLASS = 'author'
diff --git a/src/librarian/elements/front/base.py b/src/librarian/elements/front/base.py
new file mode 100644 (file)
index 0000000..9e961df
--- /dev/null
@@ -0,0 +1,15 @@
+from ..base import WLElement
+
+
+class HeaderElement(WLElement):
+    HTML_TAG = 'span'
+    
+    def txt_build(self, builder):
+        builder.enter_fragment('header')
+        super(HeaderElement, self).txt_build(builder)
+        builder.exit_fragment()
+
+    def html_build(self, builder):
+        builder.enter_fragment('header')
+        super(HeaderElement, self).html_build(builder)
+        builder.exit_fragment()
diff --git a/src/librarian/elements/front/dzielo_nadrzedne.py b/src/librarian/elements/front/dzielo_nadrzedne.py
new file mode 100644 (file)
index 0000000..c53b3ad
--- /dev/null
@@ -0,0 +1,6 @@
+from .base import HeaderElement
+
+
+class DzieloNadrzedne(HeaderElement):
+    TXT_BOTTOM_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 1
diff --git a/src/librarian/elements/front/motto.py b/src/librarian/elements/front/motto.py
new file mode 100644 (file)
index 0000000..fd81220
--- /dev/null
@@ -0,0 +1,6 @@
+from ..base import WLElement
+
+
+class Motto(WLElement):
+    TXT_LEGACY_TOP_MARGIN = 4
+    TXT_LEGACY_BOTTOM_MARGIN = 2
diff --git a/src/librarian/elements/front/motto_podpis.py b/src/librarian/elements/front/motto_podpis.py
new file mode 100644 (file)
index 0000000..decbff3
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class MottoPodpis(WLElement):
+    pass
diff --git a/src/librarian/elements/front/nazwa_utworu.py b/src/librarian/elements/front/nazwa_utworu.py
new file mode 100644 (file)
index 0000000..55ef78b
--- /dev/null
@@ -0,0 +1,9 @@
+from .base import HeaderElement
+
+
+class NazwaUtworu(HeaderElement):
+    TXT_BOTTOM_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 1
+
+    HTML_TAG = 'span'
+    HTML_CLASS = 'title'
diff --git a/src/librarian/elements/front/podtytul.py b/src/librarian/elements/front/podtytul.py
new file mode 100644 (file)
index 0000000..4431bc2
--- /dev/null
@@ -0,0 +1,8 @@
+from .base import HeaderElement
+
+
+class Podtytul(HeaderElement):
+    TXT_BOTTOM_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 1
+
+    HTML_CLASS = 'subtitle'
diff --git a/src/librarian/elements/headers/__init__.py b/src/librarian/elements/headers/__init__.py
new file mode 100644 (file)
index 0000000..9ddf8d5
--- /dev/null
@@ -0,0 +1,3 @@
+from .naglowek_czesc import NaglowekCzesc
+from .naglowek_podrozdzial import NaglowekPodrozdzial
+from .naglowek_rozdzial import NaglowekRozdzial
diff --git a/src/librarian/elements/headers/naglowek_czesc.py b/src/librarian/elements/headers/naglowek_czesc.py
new file mode 100644 (file)
index 0000000..7b0781a
--- /dev/null
@@ -0,0 +1,8 @@
+from ..base import WLElement
+
+
+class NaglowekCzesc(WLElement):
+    TXT_TOP_MARGIN = 5
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 5
+    TXT_LEGACY_BOTTOM_MARGIN = 0
diff --git a/src/librarian/elements/headers/naglowek_podrozdzial.py b/src/librarian/elements/headers/naglowek_podrozdzial.py
new file mode 100644 (file)
index 0000000..6d3f85f
--- /dev/null
@@ -0,0 +1,8 @@
+from ..base import WLElement
+
+
+class NaglowekPodrozdzial(WLElement):
+    TXT_TOP_MARGIN = 3
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 3
+    TXT_LEGACY_BOTTOM_MARGIN = 0
diff --git a/src/librarian/elements/headers/naglowek_rozdzial.py b/src/librarian/elements/headers/naglowek_rozdzial.py
new file mode 100644 (file)
index 0000000..ded615f
--- /dev/null
@@ -0,0 +1,10 @@
+from ..base import WLElement
+
+
+class NaglowekRozdzial(WLElement):
+    TXT_TOP_MARGIN = 4
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 4
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+
+    HTML_TAG = 'h3'
diff --git a/src/librarian/elements/masters/__init__.py b/src/librarian/elements/masters/__init__.py
new file mode 100644 (file)
index 0000000..2fdb6a6
--- /dev/null
@@ -0,0 +1,7 @@
+from ..base import WLElement
+
+
+class Master(WLElement):
+    CAN_HAVE_TEXT = False
+
+    TXT_LEGACY_BOTTOM_MARGIN = 2
diff --git a/src/librarian/elements/paragraphs/__init__.py b/src/librarian/elements/paragraphs/__init__.py
new file mode 100644 (file)
index 0000000..d6c8438
--- /dev/null
@@ -0,0 +1 @@
+from .akap import Akap
diff --git a/src/librarian/elements/paragraphs/akap.py b/src/librarian/elements/paragraphs/akap.py
new file mode 100644 (file)
index 0000000..836671b
--- /dev/null
@@ -0,0 +1,15 @@
+from ..base import WLElement
+
+
+class Akap(WLElement):
+    STRIP = True
+
+    TXT_TOP_MARGIN = 2
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 2
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+
+    HTML_TAG = 'p'
+    HTML_CLASS = 'paragraph'
+
+    HTML_SECTION = True
diff --git a/src/librarian/elements/poetry/__init__.py b/src/librarian/elements/poetry/__init__.py
new file mode 100644 (file)
index 0000000..80fdc4f
--- /dev/null
@@ -0,0 +1,5 @@
+from .strofa import Strofa
+from .wers_cd import WersCd
+from .wers import Wers
+from .wers_wciety import WersWciety
+from .zastepnik_wersu import ZastepnikWersu
diff --git a/src/librarian/elements/poetry/strofa.py b/src/librarian/elements/poetry/strofa.py
new file mode 100644 (file)
index 0000000..2d3a4c9
--- /dev/null
@@ -0,0 +1,50 @@
+from copy import copy
+from ..base import WLElement
+from .wers import Wers
+
+
+class Strofa(WLElement):
+    TXT_TOP_MARGIN = 2
+    TXT_BOTTOM_MARGIN = 2
+    TXT_LEGACY_TOP_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+
+    def get_verses(self):
+        from librarian.parser import parser
+
+        verses = [
+            parser.makeelement('wers')
+        ]
+        if self.text:
+            # Before any tags. These are text-only verses.
+            pieces = self.text.split('/')
+            for piece in pieces[:-1]:
+                verses[-1].text = piece
+                verses.append(parser.makeelement('wers'))
+            verses[-1].text = pieces[-1]
+
+        for child in self:
+            if child.tail:
+                pieces = child.tail.split('/')
+                child_copy = copy(child)
+                child_copy.tail = pieces[0]
+                verses[-1].append(child_copy)
+
+                for piece in pieces[1:]:
+                    verses.append(parser.makeelement('wers'))
+                    verses[-1].text = piece
+                
+            else:
+                verses[-1].append(child)
+
+        for verse in verses:
+            if len(verse) == 1 and isinstance(verse[0], Wers):
+                assert not (verse.text or '').strip()
+                assert not (verse[0].tail or '').strip()
+                yield verse[0]
+            else:
+                yield verse
+
+    def _build_inner(self, builder, build_method):
+        for child in self.get_verses():
+            getattr(child, build_method)(builder)
diff --git a/src/librarian/elements/poetry/wers.py b/src/librarian/elements/poetry/wers.py
new file mode 100644 (file)
index 0000000..e164b1d
--- /dev/null
@@ -0,0 +1,13 @@
+from ..base import WLElement
+
+
+class Wers(WLElement):
+    STRIP = True
+
+    TXT_TOP_MARGIN = 1
+    TXT_BOTTOM_MARGIN = 1
+    TXT_LEGACY_TOP_MARGIN = 1
+    TXT_LEGACY_BOTTOM_MARGIN = 0
+
+    HTML_TAG = 'div'
+    HTML_ATTRIB = {"class": "verse"}
diff --git a/src/librarian/elements/poetry/wers_cd.py b/src/librarian/elements/poetry/wers_cd.py
new file mode 100644 (file)
index 0000000..7a14938
--- /dev/null
@@ -0,0 +1,6 @@
+from .wers import Wers
+
+class WersCd(Wers):
+    def _txt_build_inner(self, builder):
+        builder.push_text(' ' * 24, prepared=True)
+        super(WersCd, self)._txt_build_inner(builder)
diff --git a/src/librarian/elements/poetry/wers_wciety.py b/src/librarian/elements/poetry/wers_wciety.py
new file mode 100644 (file)
index 0000000..3e9bb6f
--- /dev/null
@@ -0,0 +1,16 @@
+from .wers import Wers
+
+
+class WersWciety(Wers):
+    @property
+    def typ(self):
+        ## Temporary legacy compatibility fix.
+        return 2 if 'typ' in self.attrib else 1
+
+        v = self.attrib.get('typ')
+        return int(v) if v else 1
+
+    def _txt_build_inner(self, builder):
+        builder.push_text('  ' * self.typ, prepared=True)
+        super(WersWciety, self)._txt_build_inner(builder)
+
diff --git a/src/librarian/elements/poetry/zastepnik_wersu.py b/src/librarian/elements/poetry/zastepnik_wersu.py
new file mode 100644 (file)
index 0000000..edee0f7
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class ZastepnikWersu(WLElement):
+    pass
diff --git a/src/librarian/elements/root/__init__.py b/src/librarian/elements/root/__init__.py
new file mode 100644 (file)
index 0000000..a8cf82d
--- /dev/null
@@ -0,0 +1,19 @@
+from ..base import WLElement
+from ..masters import Master
+
+
+class Utwor(WLElement):
+    CAN_HAVE_TEXT = False
+
+    @property
+    def meta(self):
+        if self.meta_object is not None:
+            return self.meta_object
+        else:
+            # Deprecated: allow RDF record in master.
+            for c in self:
+                if isinstance(c, Master) and c.meta_object is not None:
+                    return c.meta_object
+            # This should not generally happen.
+            if self.getparent() is not None:
+                return self.getparent().meta
diff --git a/src/librarian/elements/separators/__init__.py b/src/librarian/elements/separators/__init__.py
new file mode 100644 (file)
index 0000000..84e9784
--- /dev/null
@@ -0,0 +1,3 @@
+from .sekcja_asterysk import SekcjaAsterysk
+from .sekcja_swiatlo import SekcjaSwiatlo
+from .separator_linia import SeparatorLinia
diff --git a/src/librarian/elements/separators/sekcja_asterysk.py b/src/librarian/elements/separators/sekcja_asterysk.py
new file mode 100644 (file)
index 0000000..c11b9d0
--- /dev/null
@@ -0,0 +1,11 @@
+from ..base import WLElement
+
+
+class SekcjaAsterysk(WLElement):
+    TXT_TOP_MARGIN = 2
+    TXT_BOTTOM_MARGIN = 4
+    TXT_LEGACY_TOP_MARGIN = 2
+    TXT_LEGACY_BOTTOM_MARGIN = 2
+
+    def _txt_build_inner(self, builder):
+        builder.push_text('*')
diff --git a/src/librarian/elements/separators/sekcja_swiatlo.py b/src/librarian/elements/separators/sekcja_swiatlo.py
new file mode 100644 (file)
index 0000000..1526548
--- /dev/null
@@ -0,0 +1,7 @@
+from ..base import WLElement
+
+
+class SekcjaSwiatlo(WLElement):
+    TXT_BOTTOM_MARGIN = 6
+    TXT_LEGACY_BOTTOM_MARGIN = 4
+
diff --git a/src/librarian/elements/separators/separator_linia.py b/src/librarian/elements/separators/separator_linia.py
new file mode 100644 (file)
index 0000000..7587785
--- /dev/null
@@ -0,0 +1,11 @@
+from ..base import WLElement
+
+
+class SeparatorLinia(WLElement):
+    TXT_TOP_MARGIN = 4
+    TXT_BOTTOM_MARGIN = 4
+    TXT_LEGACY_TOP_MARGIN = 2
+    TXT_LEGACY_BOTTOM_MARGIN = 2
+
+    def _txt_build_inner(self, builder):
+        builder.push_text('-' * 48)
diff --git a/src/librarian/elements/styles/__init__.py b/src/librarian/elements/styles/__init__.py
new file mode 100644 (file)
index 0000000..40afc01
--- /dev/null
@@ -0,0 +1,3 @@
+from .slowo_obce import SlowoObce
+from .tytul_dziela import TytulDziela
+from .wyroznienie import Wyroznienie
diff --git a/src/librarian/elements/styles/slowo_obce.py b/src/librarian/elements/styles/slowo_obce.py
new file mode 100644 (file)
index 0000000..537f7c6
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class SlowoObce(WLElement):
+    pass
diff --git a/src/librarian/elements/styles/tytul_dziela.py b/src/librarian/elements/styles/tytul_dziela.py
new file mode 100644 (file)
index 0000000..b6c3662
--- /dev/null
@@ -0,0 +1,10 @@
+# -*- coding: utf-8
+from ..base import WLElement
+
+
+class TytulDziela(WLElement):
+    def normalize_text(self, text):
+        txt = super(TytulDziela, self).normalize_text(text)
+        if self.attrib.get('typ') == '1':
+            txt = '„{txt}”'.format(txt=txt)
+        return txt
diff --git a/src/librarian/elements/styles/wyroznienie.py b/src/librarian/elements/styles/wyroznienie.py
new file mode 100644 (file)
index 0000000..dce6936
--- /dev/null
@@ -0,0 +1,7 @@
+from ..base import WLElement
+
+
+class Wyroznienie(WLElement):
+    TXT_PREFIX = "*"
+    TXT_SUFFIX = "*"
+
diff --git a/src/librarian/elements/themes/__init__.py b/src/librarian/elements/themes/__init__.py
new file mode 100644 (file)
index 0000000..f2278e3
--- /dev/null
@@ -0,0 +1,3 @@
+from .begin import Begin
+from .end import End
+from .motyw import Motyw
diff --git a/src/librarian/elements/themes/begin.py b/src/librarian/elements/themes/begin.py
new file mode 100644 (file)
index 0000000..2d7ddc4
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class Begin(WLElement):
+    pass
diff --git a/src/librarian/elements/themes/end.py b/src/librarian/elements/themes/end.py
new file mode 100644 (file)
index 0000000..a6eb9e0
--- /dev/null
@@ -0,0 +1,5 @@
+from ..base import WLElement
+
+
+class End(WLElement):
+    pass
diff --git a/src/librarian/elements/themes/motyw.py b/src/librarian/elements/themes/motyw.py
new file mode 100644 (file)
index 0000000..51042c6
--- /dev/null
@@ -0,0 +1,14 @@
+from ..base import WLElement
+
+
+class Motyw(WLElement):
+    def txt_build(self, builder):
+        pass
+
+
+    def feed_to(self, builder):
+        assert not len(self)
+        themes = [
+            normalize_text(t.strip()) for t in self.text.split(',')
+        ]
+        builder.set_themes(self.attrib['id'], themes)
index 2bb9509..3ae081b 100644 (file)
@@ -19,7 +19,27 @@ import re
 import six
 
 
 import six
 
 
+from .elements import WL_ELEMENTS
+
+
+class WLElementLookup(etree.CustomElementClassLookup):
+    def lookup(self, node_type, document, namespace, name):
+        if node_type != 'element':
+            return
+        if namespace:
+            return
+        return WL_ELEMENTS[name]
+
+
+parser = etree.XMLParser()
+parser.set_element_class_lookup(
+    WLElementLookup()
+)
+
+
+
 class WLDocument(object):
 class WLDocument(object):
+    """Legacy class, to be replaced with documents.WLDocument."""
     LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
     provider = None
 
     LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
     provider = None
 
index 14c728f..bdd3ded 100644 (file)
@@ -6,25 +6,36 @@
 from __future__ import unicode_literals
 
 from librarian import NoDublinCore
 from __future__ import unicode_literals
 
 from librarian import NoDublinCore
-from librarian.parser import WLDocument
+from librarian.parser import WLDocument as LegacyWLDocument
+from librarian.document import WLDocument
 from nose.tools import *
 from .utils import get_fixture
 
 
 from nose.tools import *
 from .utils import get_fixture
 
 
-def test_transform():
+def test_transform_legacy():
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
 
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
 
-    text = WLDocument.from_file(
+    text = LegacyWLDocument.from_file(
             get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
         ).as_text().get_bytes()
 
     assert_equal(text, open(expected_output_file_path, 'rb').read())
 
 
             get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
         ).as_text().get_bytes()
 
     assert_equal(text, open(expected_output_file_path, 'rb').read())
 
 
+def test_transform():
+    expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
+
+    text = WLDocument(
+        filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+    ).build('txt').get_bytes()
+
+    assert_equal(text, open(expected_output_file_path, 'rb').read())
+
+    
 def test_transform_raw():
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
 
 def test_transform_raw():
     expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
 
-    text = WLDocument.from_file(
+    text = LegacyWLDocument.from_file(
             get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
         ).as_text(flags=['raw-text']).get_bytes()
 
             get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
         ).as_text(flags=['raw-text']).get_bytes()
 
@@ -33,14 +44,14 @@ def test_transform_raw():
 
 @raises(NoDublinCore)
 def test_no_dublincore():
 
 @raises(NoDublinCore)
 def test_no_dublincore():
-    WLDocument.from_file(
+    LegacyWLDocument.from_file(
             get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
         ).as_text()
 
 
 def test_passing_parse_dublincore_to_transform():
     """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
             get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
         ).as_text()
 
 
 def test_passing_parse_dublincore_to_transform():
     """Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
-    WLDocument.from_file(
+    LegacyWLDocument.from_file(
             get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
             parse_dublincore=False,
         ).as_text()
             get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
             parse_dublincore=False,
         ).as_text()
diff --git a/tox.ini b/tox.ini
index bdce7bd..48c35e3 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -1,7 +1,7 @@
 [tox]
 envlist =
     clean,
 [tox]
 envlist =
     clean,
-    py{27,35,36,37},
+    py{27,35,36,37,38},
     stats
 
 [testenv]
     stats
 
 [testenv]