'texml',
'ebooklib',
],
+ entry_points = {
+ "console_scripts": [
+ "librarian=librarian.command_line:main"
+ ]
+ },
scripts=['scripts/book2html',
'scripts/book2txt',
'scripts/book2epub',
--- /dev/null
+from .txt import TxtBuilder
+from .html import HtmlBuilder
+from .sanitize import Sanitizer
+
+
+builders = [
+ TxtBuilder,
+ HtmlBuilder,
+ Sanitizer,
+]
+
+
+def get_builder_class(builder_id):
+ return next(b for b in builders if b.identifier == builder_id)
--- /dev/null
+from lxml import etree
+from librarian import OutputFile
+
+
+class HtmlBuilder:
+ file_extension = "html"
+ identifier = "html"
+
+ def __init__(self, image_location='https://wolnelektury.pl/media/book/pictures/marcos-historia-kolorow/'):
+ self.image_location = image_location
+
+ #self.tree = etree.Element('html')
+ #body = etree.SubElement(self.tree, 'body')
+ #text = etree.SubElement(body, 'div', **{'id': 'book-text'})
+ self.tree = text = etree.Element('div', **{'id': 'book-text'})
+ toc = etree.SubElement(text, 'div', id='toc')
+ themes = etree.SubElement(text, 'div', id='themes')
+ h1 = etree.SubElement(text, 'h1')
+
+ self.cursors = {
+ None: text,
+ 'toc': toc,
+ 'themes': themes,
+ 'header': h1,
+ }
+ self.current_cursors = [None]
+
+ def enter_fragment(self, fragment):
+ self.current_cursors.append(fragment)
+
+ def exit_fragment(self):
+ self.current_cursors.pop()
+
+ def build(self, document):
+ document.tree.getroot().html_build(self)
+
+ head = etree.Element('head')
+ self.tree.insert(0, head)
+ etree.SubElement(
+ head,
+ 'link',
+ href="https://static.wolnelektury.pl/css/compressed/book_text.b15153e56c0a.css",
+ rel="stylesheet",
+ type="text/css",
+ )
+
+ return OutputFile.from_bytes(
+ etree.tostring(
+ self.tree,
+ method='html',
+ encoding='utf-8',
+ pretty_print=True
+ )
+ )
+
+ def start_element(self, tag, attrib):
+ self.cursors[self.current_cursors[-1]] = etree.SubElement(
+ self.cursors[self.current_cursors[-1]],
+ tag,
+ **attrib
+ )
+ print(self.cursors)
+
+ def end_element(self):
+ self.cursors[self.current_cursors[-1]] = self.cursors[self.current_cursors[-1]].getparent()
+
+ def push_text(self, text):
+ cursor = self.cursors[self.current_cursors[-1]]
+ if len(cursor):
+ cursor.tail = (cursor[-1].tail or '') + text
+ else:
+ cursor.text = (cursor.text or '') + text
--- /dev/null
+from lxml import etree
+from librarian import OutputFile
+
+
+class Sanitizer:
+ identifier = 'sanitize'
+ file_extension = 'xml2'
+
+ def build(self, document):
+ doc = document.tree.getroot() # TODO: copy
+ doc.sanitize()
+ return OutputFile.from_bytes(
+ etree.tostring(
+ doc,
+ encoding='utf-8',
+ )
+ )
+
--- /dev/null
+# coding: utf-8
+from __future__ import unicode_literals
+
+import io
+from librarian import OutputFile, get_resource
+
+
+with io.open(get_resource("res/text/template.txt")) as f:
+ TEMPLATE = f.read()
+
+
+class TxtFragment:
+ def __init__(self):
+ self.pieces = []
+ self.current_margin = 0
+ self.starting_block = True
+
+ def push_legacy_margin(self, margin):
+ if margin:
+ if self.pieces:
+ self.pieces[-1] = self.pieces[-1].rstrip(' ')
+ self.pieces.append('\r\n' * margin)
+ self.current_margin += margin
+ self.starting_block = True
+
+ def push_margin(self, margin):
+ if margin:
+ if self.pieces:
+ self.pieces[-1] = self.pieces[-1].rstrip(' ')
+ if margin > self.current_margin:
+ self.pieces.append('\r\n' * (margin - self.current_margin))
+ self.current_margin = margin
+ self.starting_block = True
+
+ def push_text(self, text, prepared=False):
+ if text:
+ if self.starting_block and not prepared:
+ text = text.lstrip()
+ self.pieces.append(text)
+ self.current_margin = 0
+ if not prepared:
+ self.starting_block = False
+
+
+class TxtBuilder:
+ """
+ """
+ file_extension = "txt"
+ identifier = "txt"
+
+ default_license_description = {
+ "pol": (
+ "Ten utwór nie jest objęty majątkowym prawem autorskim "
+ "i znajduje się w domenie publicznej, co oznacza że "
+ "możesz go swobodnie wykorzystywać, publikować "
+ "i rozpowszechniać. Jeśli utwór opatrzony jest "
+ "dodatkowymi materiałami (przypisy, motywy literackie "
+ "etc.), które podlegają prawu autorskiemu, to te "
+ "dodatkowe materiały udostępnione są na licencji "
+ "Creative Commons Uznanie Autorstwa – Na Tych Samych "
+ "Warunkach 3.0 PL "
+ "(http://creativecommons.org/licenses/by-sa/3.0/)"
+ )
+ }
+ license_description = {
+ "pol": "Ten utwór jest udostępniony na licencji {meta.license_description}: \n{meta.license}",
+ }
+
+ def __init__(self):
+ self.fragments = {
+ None: TxtFragment(),
+ 'header': TxtFragment()
+ }
+ self.current_fragments = [self.fragments[None]]
+
+ def enter_fragment(self, fragment):
+ self.current_fragments.append(self.fragments[fragment])
+
+ def exit_fragment(self):
+ self.current_fragments.pop()
+
+ def push_text(self, text, prepared=False):
+ self.current_fragments[-1].push_text(text, prepared=prepared)
+
+ def push_margin(self, margin):
+ self.current_fragments[-1].push_margin(margin)
+
+ def push_legacy_margin(self, margin, where=None):
+ self.current_fragments[-1].push_legacy_margin(margin)
+
+ def build(self, document, raw_text=False):
+ document.tree.getroot().txt_build(self)
+ meta = document.meta
+
+ self.enter_fragment('header')
+ if meta.translators:
+ self.push_text("tłum. ", 'header')
+ for translator in meta.translators:
+ self.push_text(translator.readable())
+ #builder.push_margin(2)
+ self.push_legacy_margin(1)
+
+ if meta.isbn_txt:
+ #builder.push_margin(2)
+ self.push_legacy_margin(1)
+ isbn = meta.isbn_txt
+ if isbn.startswith(('ISBN-' , 'ISBN ')):
+ isbn = isbn[5:]
+ self.push_text('ISBN {isbn}'.format(isbn=isbn))
+ #builder.push_margin(5)
+
+ #builder.push_margin(4)
+ self.push_legacy_margin(1)
+ self.exit_fragment()
+
+ text = ''.join(self.fragments['header'].pieces) + ''.join(self.fragments[None].pieces)
+
+ if raw_text:
+ result = text
+ else:
+ if meta.license:
+ license_description = self.license_description['pol'].format(meta=meta)
+ else:
+ license_description = self.default_license_description['pol']
+
+ if meta.source_name:
+ source = "\n\nTekst opracowany na podstawie: " + meta.source_name
+ else:
+ source = ''
+
+ contributors = ', '.join(
+ person.readable()
+ for person in sorted(set(
+ p for p in (
+ meta.technical_editors + meta.editors
+ ) if p))
+ )
+ if contributors:
+ contributors = (
+ "\n\nOpracowanie redakcyjne i przypisy: %s."
+ % contributors
+ )
+
+ funders = ', '.join(meta.funders)
+ if funders:
+ funders = u"\n\nPublikację wsparli i wsparły: %s." % funders
+
+ isbn = getattr(meta, 'isbn_txt', None)
+ if isbn:
+ isbn = '\n\n' + isbn
+ else:
+ isbn = ''
+
+ result = TEMPLATE % {
+ "text": text,
+ "description": meta.description,
+ "url": meta.url,
+ "license_description": license_description,
+ "source": source,
+ "contributors": contributors,
+ "funders": funders,
+ "publisher": '\n\nWydawca: ' + ', '.join(meta.publisher),
+ "isbn": isbn,
+ }
+
+ result = '\r\n'.join(result.splitlines()) + '\r\n'
+ return OutputFile.from_bytes(result.encode('utf-8'))
--- /dev/null
+import argparse
+import os.path
+from .builders import builders
+from .document import WLDocument
+
+
+def main(*args, **kwargs):
+ parser = argparse.ArgumentParser(description="PARSER DESCRIPTION")
+
+ parser.add_argument(
+ 'builder',
+ choices=[b.identifier for b in builders],
+ help="Builder"
+ )
+ parser.add_argument('input_file')
+ parser.add_argument(
+ '-o', '--output-file', metavar='FILE',
+ help='specifies the output file'
+ )
+ parser.add_argument(
+ '-O', '--output-dir', metavar='DIR',
+ help='specifies the directory for output'
+ )
+
+ args = parser.parse_args()
+
+ if args.output_file:
+ output_file_path = args.output_file
+ else:
+ output_file_path = '.'.join((
+ os.path.splitext(args.input_file)[0],
+ builder.file_extension
+ ))
+ if args.output_dir:
+ output_file_path = '/'.join((
+ args.output_dir,
+ output_file_path.rsplit('/', 1)[-1]
+ ))
+
+ document = WLDocument(filename=args.input_file)
+ output = document.build(args.builder)
+ with open(output_file_path, 'wb') as f:
+ f.write(output.get_bytes())
% (self.uri, e.message)
)
- def validate(self, fdict, fallbacks=None, strict=False):
+ def validate(self, fdict, fallbacks=None, strict=False, validate_required=True):
if fallbacks is None:
fallbacks = {}
if self.uri not in fdict:
f = [fallbacks[self.salias]]
else:
f = self.default
- else:
+ elif validate_required:
raise ValidationError("Required field %s not found" % self.uri)
+ else:
+ return None
else:
f = fdict[self.uri]
return cls(desc.attrib, field_dict, *args, **kwargs)
- def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False):
+ def __init__(self, rdf_attrs, dc_fields, fallbacks=None, strict=False, validate_required=True):
"""
rdf_attrs should be a dictionary-like object with any attributes
of the RDF:Description.
for field in self.FIELDS:
value = field.validate(dc_fields, fallbacks=fallbacks,
- strict=strict)
+ strict=strict, validate_required=validate_required)
setattr(self, 'prop_' + field.name, value)
self.fmap[field.name] = field
if field.salias:
--- /dev/null
+from lxml import etree
+from .builders import get_builder_class
+from .parser import parser
+from . import dcparser
+
+
+class WLDocument:
+ def __init__(self, tree=None, filename=None):
+ if filename is not None:
+ tree = etree.parse(filename, parser=parser)
+ self.tree = tree
+ tree.getroot().document = self
+ self.base_meta = dcparser.BookInfo({}, {}, validate_required=False)
+
+ @property
+ def meta(self):
+ # Allow metadata of the master element as document meta.
+ #master = self.tree.getroot()[-1]
+ return self.tree.getroot().meta
+ return master.meta
+
+ def build(self, builder_id, **kwargs):
+ return get_builder_class(builder_id)().build(self, **kwargs)
+
--- /dev/null
+from lxml import etree
+from . import (blocks, comments, drama, figures, footnotes, front, headers,
+ masters, paragraphs, poetry, root, separators, styles, themes)
+
+
+WL_ELEMENTS = {
+ 'meta': etree.ElementBase,
+ 'coverClass': etree.ElementBase,
+ "developmentStage": etree.ElementBase,
+ "coverBarColor": etree.ElementBase,
+ "coverBoxPosition": etree.ElementBase,
+ "coverLogoUrl": etree.ElementBase,
+
+ "utwor": root.Utwor,
+ "dramat_wierszowany_l": masters.Master,
+ "dramat_wierszowany_lp": masters.Master,
+ "dramat_wspolczesny": masters.Master,
+ "liryka_l": masters.Master,
+ "liryka_lp": masters.Master,
+ "opowiadanie": masters.Master,
+ "powiesc": masters.Master,
+
+ "autor_utworu": front.AutorUtworu,
+ "dzielo_nadrzedne": front.DzieloNadrzedne,
+ "nazwa_utworu": front.NazwaUtworu,
+ "podtytul": front.Podtytul,
+
+ "lista_osob": drama.ListaOsob,
+ "lista_osoba": drama.ListaOsoba,
+ "naglowek_osoba": drama.NaglowekOsoba,
+ "osoba": drama.Osoba,
+
+ "dlugi_cytat": blocks.DlugiCytat,
+ "poezja_cyt": blocks.PoezjaCyt,
+ "dlugi_cyt": blocks.DlugiCytat, ### ???
+
+ "slowo_obce": styles.SlowoObce,
+ "tytul_dziela": styles.TytulDziela,
+ "wyroznienie": styles.Wyroznienie,
+
+ "akap": paragraphs.Akap,
+ "akap_cd": paragraphs.Akap,
+ "akap_dialog": paragraphs.Akap,
+
+ "motto_podpis": front.MottoPodpis,
+
+ "strofa": poetry.Strofa,
+
+ "motto": front.Motto,
+
+ "didaskalia": drama.Didaskalia,
+ "kwestia": drama.Kwestia,
+ "didask_tekst": drama.DidaskTekst,
+
+ "dedykacja": paragraphs.Akap,
+ "miejsce_czas": paragraphs.Akap,
+
+ "uwaga": comments.Uwaga,
+
+ "wers": poetry.Wers,
+ "wers_wciety": poetry.WersWciety,
+ "wers_cd": poetry.WersCd,
+ "wers_akap": poetry.Wers,
+ "zastepnik_wersu": poetry.ZastepnikWersu,
+ "wers_do_prawej": poetry.Wers,
+
+ "pa": footnotes.Footnote,
+ "pe": footnotes.Footnote,
+ "pr": footnotes.Footnote,
+ "pt": footnotes.Footnote,
+
+ "begin": themes.Begin,
+ "end": themes.End,
+ "motyw": themes.Motyw,
+
+ "nota": blocks.Nota,
+
+ "nota_red": comments.Abstrakt,
+ "extra": comments.Abstrakt,
+ "abstrakt": comments.Abstrakt,
+
+ "naglowek_czesc": headers.NaglowekCzesc,
+ "naglowek_akt": headers.NaglowekCzesc,
+ "naglowek_scena": headers.NaglowekRozdzial,
+ "naglowek_rozdzial": headers.NaglowekRozdzial,
+ "naglowek_podrozdzial": headers.NaglowekPodrozdzial,
+ "srodtytul": headers.NaglowekCzesc,
+
+ "naglowek_listy": drama.NaglowekListy,
+
+ "sekcja_asterysk": separators.SekcjaAsterysk,
+ "sekcja_swiatlo": separators.SekcjaSwiatlo,
+ "separator_linia": separators.SeparatorLinia,
+
+ "wieksze_odstepy": styles.Wyroznienie,
+ "mat": styles.Wyroznienie,
+ "www": styles.Wyroznienie,
+ "indeks_dolny": styles.Wyroznienie,
+
+ "tabela": paragraphs.Akap,
+ "tabelka": paragraphs.Akap,
+ "wiersz": paragraphs.Akap,
+ "kol": paragraphs.Akap,
+
+ "ilustr": figures.Ilustr,
+
+# sklodowska-badanie-cial-radioaktywnych.xml
+ "mrow": paragraphs.Akap,
+ "mi": paragraphs.Akap,
+ "mo": paragraphs.Akap,
+ "msup": paragraphs.Akap,
+ "mn": paragraphs.Akap,
+ "mfrac": paragraphs.Akap,
+ "mfenced": paragraphs.Akap,
+}
--- /dev/null
+# -*- coding: utf-8
+
+import re
+from lxml import etree
+from librarian import dcparser, RDFNS
+
+
+class WLElement(etree.ElementBase):
+ TXT_TOP_MARGIN = 0
+ TXT_BOTTOM_MARGIN = 0
+ TXT_PREFIX = ""
+ TXT_SUFFIX = ""
+
+ HTML_TAG = None
+ HTML_ATTR = {}
+ HTML_CLASS = None
+ HTML_SECTION = False
+
+ CAN_HAVE_TEXT = True
+ STRIP = False
+
+ text_substitutions = [
+ (u'---', u'—'),
+ (u'--', u'–'),
+ (u'...', u'…'),
+ (u',,', u'„'),
+ (u'"', u'”'),
+ ('\ufeff', ''),
+ ]
+
+ @property
+ def meta_object(self):
+ if not hasattr(self, '_meta_object'):
+ elem = self.find(RDFNS('RDF'))
+ if elem is not None:
+ self._meta_object = dcparser.BookInfo.from_element(elem)
+ else:
+ self._meta_object = None
+ return self._meta_object
+
+ @property
+ def meta(self):
+ if self.meta_object is not None:
+ return self.meta_object
+ else:
+ if self.getparent() is not None:
+ return self.getparent().meta
+ else:
+ return self.document.base_meta
+
+ def normalize_text(self, text):
+ text = text or ''
+ for e, s in self.text_substitutions:
+ text = text.replace(e, s)
+ text = re.sub(r'\s+', ' ', text)
+ return text
+
+ def _build_inner(self, builder, build_method):
+ child_count = len(self)
+ if self.CAN_HAVE_TEXT and self.text:
+ text = self.normalize_text(self.text)
+ if self.STRIP:
+ text = text.lstrip()
+ if not child_count:
+ text = text.rstrip()
+ builder.push_text(text)
+ for i, child in enumerate(self):
+ if isinstance(child, WLElement):
+ getattr(child, build_method)(builder)
+ if self.CAN_HAVE_TEXT and child.tail:
+ text = self.normalize_text(child.tail)
+ if self.STRIP and i == child_count - 1:
+ text = text.rstrip()
+ builder.push_text(text)
+
+ def _txt_build_inner(self, builder):
+ self._build_inner(builder, 'txt_build')
+
+ def txt_build(self, builder):
+ if hasattr(self, 'TXT_LEGACY_TOP_MARGIN'):
+ builder.push_legacy_margin(self.TXT_LEGACY_TOP_MARGIN)
+ else:
+ builder.push_margin(self.TXT_TOP_MARGIN)
+ builder.push_text(self.TXT_PREFIX, True)
+ self._txt_build_inner(builder)
+ builder.push_text(self.TXT_SUFFIX, True)
+ if hasattr(self, 'TXT_LEGACY_BOTTOM_MARGIN'):
+ builder.push_legacy_margin(self.TXT_LEGACY_BOTTOM_MARGIN)
+ else:
+ builder.push_margin(self.TXT_BOTTOM_MARGIN)
+
+ def _html_build_inner(self, builder):
+ self._build_inner(builder, 'html_build')
+
+ def get_html_attr(self, builder):
+ attr = self.HTML_ATTR.copy()
+ if self.HTML_CLASS:
+ attr['class'] = self.HTML_CLASS
+ # always copy the id attribute (?)
+ if self.attrib.get('id'):
+ attr['id'] = self.attrib['id']
+ return attr
+
+ def html_build(self, builder):
+ if self.HTML_SECTION:
+ builder.start_element(
+ 'a', {"name": "f18", "class": "target"}
+ )
+ builder.push_text(" ")
+ builder.end_element()
+
+ builder.start_element(
+ "a", {"href": "#f18", "class": "anchor"}
+ )
+ builder.push_text("18")
+ builder.end_element()
+
+
+ if self.HTML_TAG:
+ builder.start_element(
+ self.HTML_TAG,
+ self.get_html_attr(builder),
+ )
+
+ if self.HTML_SECTION:
+ builder.start_element(
+ "a", {"name": "sec34"}
+ )
+ builder.end_element()
+
+ self._html_build_inner(builder)
+ if self.HTML_TAG:
+ builder.end_element()
+
+ def sanitize(self):
+ # TODO: Remove insanity here.
+ for e in self:
+ if isinstance(e, WLElement):
+ e.sanitize()
--- /dev/null
+from .dlugi_cytat import DlugiCytat
+from .nota import Nota
+from .poezja_cyt import PoezjaCyt
--- /dev/null
+from ..base import WLElement
+
+
+class DlugiCytat(WLElement):
+ CAN_HAVE_TEXT = False
+
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 0
--- /dev/null
+from ..base import WLElement
+
+
+class Nota(WLElement):
+ CAN_HAVE_TEXT = False
--- /dev/null
+from ..base import WLElement
+
+
+class PoezjaCyt(WLElement):
+ CAN_HAVE_TEXT = False
+
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 3
+ TXT_LEGACY_TOP_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 0
--- /dev/null
+from .abstrakt import Abstrakt
+from .uwaga import Uwaga
--- /dev/null
+from ..base import WLElement
+
+
+class Abstrakt(WLElement):
+ def txt_build(self, builder):
+ pass
+
+ def html_build(self, builder):
+ pass
--- /dev/null
+from ..base import WLElement
+
+
+class Uwaga(WLElement):
+ def txt_build(self, builder):
+ pass
+
--- /dev/null
+from .didaskalia import Didaskalia
+from .didask_tekst import DidaskTekst
+from .kwestia import Kwestia
+from .lista_osoba import ListaOsoba
+from .lista_osob import ListaOsob
+from .naglowek_listy import NaglowekListy
+from .naglowek_osoba import NaglowekOsoba
+from .osoba import Osoba
--- /dev/null
+from ..base import WLElement
+
+
+class DidaskTekst(WLElement):
+ TXT_PREFIX = "/ "
+ TXT_SUFFIX = " /"
--- /dev/null
+from ..base import WLElement
+
+
+class Didaskalia(WLElement):
+ TXT_TOP_PARGIN = 2
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 2
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+ TXT_PREFIX = "/ "
+ TXT_SUFFIX = " /"
+
--- /dev/null
+from ..base import WLElement
+
+
+class Kwestia(WLElement):
+ CAN_HAVE_TEXT = False
+
--- /dev/null
+from ..base import WLElement
+
+
+class ListaOsob(WLElement):
+ CAN_HAVE_TEXT = False
+
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 3
+ TXT_LEGACY_TOP_MARGIN = 3
+ TXT_LEGACY_BOTTOM_MARGIN = 1
+
--- /dev/null
+from ..base import WLElement
+
+
+class ListaOsoba(WLElement):
+ TXT_TOP_MARGIN = 1
+ TXT_BOTTOM_MARGIN = 1
+ TXT_LEGACY_TOP_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+ TXT_PREFIX = " * "
+
--- /dev/null
+from ..base import WLElement
+
+
+class NaglowekListy(WLElement):
+ pass
--- /dev/null
+from ..base import WLElement
+
+
+class NaglowekOsoba(WLElement):
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 3
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+
--- /dev/null
+from ..base import WLElement
+
+
+class Osoba(WLElement):
+ pass
+
--- /dev/null
+from .ilustr import Ilustr
--- /dev/null
+from ..base import WLElement
+
+
+class Ilustr(WLElement):
+ HTML_TAG = 'img'
+
+ def get_html_attr(self, builder):
+ return {
+ 'src': builder.image_location + self.attrib['src']
+ }
--- /dev/null
+from ..base import WLElement
+
+
+class Footnote(WLElement):
+ def txt_build(self, builder):
+ pass
+
--- /dev/null
+from .autor_utworu import AutorUtworu
+from .dzielo_nadrzedne import DzieloNadrzedne
+from .motto_podpis import MottoPodpis
+from .motto import Motto
+from .nazwa_utworu import NazwaUtworu
+from .podtytul import Podtytul
--- /dev/null
+from .base import HeaderElement
+
+
+class AutorUtworu(HeaderElement):
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_BOTTOM_MARGIN = 2
+
+ HTML_CLASS = 'author'
--- /dev/null
+from ..base import WLElement
+
+
+class HeaderElement(WLElement):
+ HTML_TAG = 'span'
+
+ def txt_build(self, builder):
+ builder.enter_fragment('header')
+ super(HeaderElement, self).txt_build(builder)
+ builder.exit_fragment()
+
+ def html_build(self, builder):
+ builder.enter_fragment('header')
+ super(HeaderElement, self).html_build(builder)
+ builder.exit_fragment()
--- /dev/null
+from .base import HeaderElement
+
+
+class DzieloNadrzedne(HeaderElement):
+ TXT_BOTTOM_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 1
--- /dev/null
+from ..base import WLElement
+
+
+class Motto(WLElement):
+ TXT_LEGACY_TOP_MARGIN = 4
+ TXT_LEGACY_BOTTOM_MARGIN = 2
--- /dev/null
+from ..base import WLElement
+
+
+class MottoPodpis(WLElement):
+ pass
--- /dev/null
+from .base import HeaderElement
+
+
+class NazwaUtworu(HeaderElement):
+ TXT_BOTTOM_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 1
+
+ HTML_TAG = 'span'
+ HTML_CLASS = 'title'
--- /dev/null
+from .base import HeaderElement
+
+
+class Podtytul(HeaderElement):
+ TXT_BOTTOM_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 1
+
+ HTML_CLASS = 'subtitle'
--- /dev/null
+from .naglowek_czesc import NaglowekCzesc
+from .naglowek_podrozdzial import NaglowekPodrozdzial
+from .naglowek_rozdzial import NaglowekRozdzial
--- /dev/null
+from ..base import WLElement
+
+
+class NaglowekCzesc(WLElement):
+ TXT_TOP_MARGIN = 5
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 5
+ TXT_LEGACY_BOTTOM_MARGIN = 0
--- /dev/null
+from ..base import WLElement
+
+
+class NaglowekPodrozdzial(WLElement):
+ TXT_TOP_MARGIN = 3
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 3
+ TXT_LEGACY_BOTTOM_MARGIN = 0
--- /dev/null
+from ..base import WLElement
+
+
+class NaglowekRozdzial(WLElement):
+ TXT_TOP_MARGIN = 4
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 4
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+
+ HTML_TAG = 'h3'
--- /dev/null
+from ..base import WLElement
+
+
+class Master(WLElement):
+ CAN_HAVE_TEXT = False
+
+ TXT_LEGACY_BOTTOM_MARGIN = 2
--- /dev/null
+from .akap import Akap
--- /dev/null
+from ..base import WLElement
+
+
+class Akap(WLElement):
+ STRIP = True
+
+ TXT_TOP_MARGIN = 2
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 2
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+
+ HTML_TAG = 'p'
+ HTML_CLASS = 'paragraph'
+
+ HTML_SECTION = True
--- /dev/null
+from .strofa import Strofa
+from .wers_cd import WersCd
+from .wers import Wers
+from .wers_wciety import WersWciety
+from .zastepnik_wersu import ZastepnikWersu
--- /dev/null
+from copy import copy
+from ..base import WLElement
+from .wers import Wers
+
+
+class Strofa(WLElement):
+ TXT_TOP_MARGIN = 2
+ TXT_BOTTOM_MARGIN = 2
+ TXT_LEGACY_TOP_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+
+ def get_verses(self):
+ from librarian.parser import parser
+
+ verses = [
+ parser.makeelement('wers')
+ ]
+ if self.text:
+ # Before any tags. These are text-only verses.
+ pieces = self.text.split('/')
+ for piece in pieces[:-1]:
+ verses[-1].text = piece
+ verses.append(parser.makeelement('wers'))
+ verses[-1].text = pieces[-1]
+
+ for child in self:
+ if child.tail:
+ pieces = child.tail.split('/')
+ child_copy = copy(child)
+ child_copy.tail = pieces[0]
+ verses[-1].append(child_copy)
+
+ for piece in pieces[1:]:
+ verses.append(parser.makeelement('wers'))
+ verses[-1].text = piece
+
+ else:
+ verses[-1].append(child)
+
+ for verse in verses:
+ if len(verse) == 1 and isinstance(verse[0], Wers):
+ assert not (verse.text or '').strip()
+ assert not (verse[0].tail or '').strip()
+ yield verse[0]
+ else:
+ yield verse
+
+ def _build_inner(self, builder, build_method):
+ for child in self.get_verses():
+ getattr(child, build_method)(builder)
--- /dev/null
+from ..base import WLElement
+
+
+class Wers(WLElement):
+ STRIP = True
+
+ TXT_TOP_MARGIN = 1
+ TXT_BOTTOM_MARGIN = 1
+ TXT_LEGACY_TOP_MARGIN = 1
+ TXT_LEGACY_BOTTOM_MARGIN = 0
+
+ HTML_TAG = 'div'
+ HTML_ATTRIB = {"class": "verse"}
--- /dev/null
+from .wers import Wers
+
+class WersCd(Wers):
+ def _txt_build_inner(self, builder):
+ builder.push_text(' ' * 24, prepared=True)
+ super(WersCd, self)._txt_build_inner(builder)
--- /dev/null
+from .wers import Wers
+
+
+class WersWciety(Wers):
+ @property
+ def typ(self):
+ ## Temporary legacy compatibility fix.
+ return 2 if 'typ' in self.attrib else 1
+
+ v = self.attrib.get('typ')
+ return int(v) if v else 1
+
+ def _txt_build_inner(self, builder):
+ builder.push_text(' ' * self.typ, prepared=True)
+ super(WersWciety, self)._txt_build_inner(builder)
+
--- /dev/null
+from ..base import WLElement
+
+
+class ZastepnikWersu(WLElement):
+ pass
--- /dev/null
+from ..base import WLElement
+from ..masters import Master
+
+
+class Utwor(WLElement):
+ CAN_HAVE_TEXT = False
+
+ @property
+ def meta(self):
+ if self.meta_object is not None:
+ return self.meta_object
+ else:
+ # Deprecated: allow RDF record in master.
+ for c in self:
+ if isinstance(c, Master) and c.meta_object is not None:
+ return c.meta_object
+ # This should not generally happen.
+ if self.getparent() is not None:
+ return self.getparent().meta
--- /dev/null
+from .sekcja_asterysk import SekcjaAsterysk
+from .sekcja_swiatlo import SekcjaSwiatlo
+from .separator_linia import SeparatorLinia
--- /dev/null
+from ..base import WLElement
+
+
+class SekcjaAsterysk(WLElement):
+ TXT_TOP_MARGIN = 2
+ TXT_BOTTOM_MARGIN = 4
+ TXT_LEGACY_TOP_MARGIN = 2
+ TXT_LEGACY_BOTTOM_MARGIN = 2
+
+ def _txt_build_inner(self, builder):
+ builder.push_text('*')
--- /dev/null
+from ..base import WLElement
+
+
+class SekcjaSwiatlo(WLElement):
+ TXT_BOTTOM_MARGIN = 6
+ TXT_LEGACY_BOTTOM_MARGIN = 4
+
--- /dev/null
+from ..base import WLElement
+
+
+class SeparatorLinia(WLElement):
+ TXT_TOP_MARGIN = 4
+ TXT_BOTTOM_MARGIN = 4
+ TXT_LEGACY_TOP_MARGIN = 2
+ TXT_LEGACY_BOTTOM_MARGIN = 2
+
+ def _txt_build_inner(self, builder):
+ builder.push_text('-' * 48)
--- /dev/null
+from .slowo_obce import SlowoObce
+from .tytul_dziela import TytulDziela
+from .wyroznienie import Wyroznienie
--- /dev/null
+from ..base import WLElement
+
+
+class SlowoObce(WLElement):
+ pass
--- /dev/null
+# -*- coding: utf-8
+from ..base import WLElement
+
+
+class TytulDziela(WLElement):
+ def normalize_text(self, text):
+ txt = super(TytulDziela, self).normalize_text(text)
+ if self.attrib.get('typ') == '1':
+ txt = '„{txt}”'.format(txt=txt)
+ return txt
--- /dev/null
+from ..base import WLElement
+
+
+class Wyroznienie(WLElement):
+ TXT_PREFIX = "*"
+ TXT_SUFFIX = "*"
+
--- /dev/null
+from .begin import Begin
+from .end import End
+from .motyw import Motyw
--- /dev/null
+from ..base import WLElement
+
+
+class Begin(WLElement):
+ pass
--- /dev/null
+from ..base import WLElement
+
+
+class End(WLElement):
+ pass
--- /dev/null
+from ..base import WLElement
+
+
+class Motyw(WLElement):
+ def txt_build(self, builder):
+ pass
+
+
+ def feed_to(self, builder):
+ assert not len(self)
+ themes = [
+ normalize_text(t.strip()) for t in self.text.split(',')
+ ]
+ builder.set_themes(self.attrib['id'], themes)
import six
+from .elements import WL_ELEMENTS
+
+
+class WLElementLookup(etree.CustomElementClassLookup):
+ def lookup(self, node_type, document, namespace, name):
+ if node_type != 'element':
+ return
+ if namespace:
+ return
+ return WL_ELEMENTS[name]
+
+
+parser = etree.XMLParser()
+parser.set_element_class_lookup(
+ WLElementLookup()
+)
+
+
+
class WLDocument(object):
+ """Legacy class, to be replaced with documents.WLDocument."""
LINE_SWAP_EXPR = re.compile(r'/\s', re.MULTILINE | re.UNICODE)
provider = None
from __future__ import unicode_literals
from librarian import NoDublinCore
-from librarian.parser import WLDocument
+from librarian.parser import WLDocument as LegacyWLDocument
+from librarian.document import WLDocument
from nose.tools import *
from .utils import get_fixture
-def test_transform():
+def test_transform_legacy():
expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
- text = WLDocument.from_file(
+ text = LegacyWLDocument.from_file(
get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
).as_text().get_bytes()
assert_equal(text, open(expected_output_file_path, 'rb').read())
+def test_transform():
+ expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected.txt')
+
+ text = WLDocument(
+ filename=get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
+ ).build('txt').get_bytes()
+
+ assert_equal(text, open(expected_output_file_path, 'rb').read())
+
+
def test_transform_raw():
expected_output_file_path = get_fixture('text', 'asnyk_miedzy_nami_expected_raw.txt')
- text = WLDocument.from_file(
+ text = LegacyWLDocument.from_file(
get_fixture('text', 'miedzy-nami-nic-nie-bylo.xml')
).as_text(flags=['raw-text']).get_bytes()
@raises(NoDublinCore)
def test_no_dublincore():
- WLDocument.from_file(
+ LegacyWLDocument.from_file(
get_fixture('text', 'asnyk_miedzy_nami_nodc.xml')
).as_text()
def test_passing_parse_dublincore_to_transform():
"""Passing parse_dublincore=False to the constructor omits DublinCore parsing."""
- WLDocument.from_file(
+ LegacyWLDocument.from_file(
get_fixture('text', 'asnyk_miedzy_nami_nodc.xml'),
parse_dublincore=False,
).as_text()
[tox]
envlist =
clean,
- py{27,35,36,37},
+ py{27,35,36,37,38},
stats
[testenv]