From: Radek Czajka Date: Wed, 10 Jun 2020 14:59:42 +0000 (+0200) Subject: Housekeeping. X-Git-Tag: 1.9~6 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/8550d172b829f29b2fcd4723789fb2a5d9fff6eb Housekeeping. --- diff --git a/src/librarian/__init__.py b/src/librarian/__init__.py index 119b6b1..95ea3fe 100644 --- a/src/librarian/__init__.py +++ b/src/librarian/__init__.py @@ -27,20 +27,25 @@ class UnicodeException(Exception): message = six.text_type(args, encoding='utf-8', errors='ignore') return message + class ParseError(UnicodeException): pass + class ValidationError(UnicodeException): pass + class NoDublinCore(ValidationError): """There's no DublinCore section, and it's required.""" pass + class NoProvider(UnicodeException): """There's no DocProvider specified, and it's needed.""" pass + class XMLNamespace(object): '''A handy structure to repsent names in an XML namespace.''' @@ -59,6 +64,7 @@ class XMLNamespace(object): def __str__(self): return '%s' % self.uri + class EmptyNamespace(XMLNamespace): def __init__(self): super(EmptyNamespace, self).__init__('') @@ -66,6 +72,7 @@ class EmptyNamespace(XMLNamespace): def __call__(self, tag): return tag + # some common namespaces we use XMLNS = XMLNamespace('http://www.w3.org/XML/1998/namespace') RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') @@ -85,8 +92,10 @@ class WLURI(object): slug = None example = 'http://wolnelektury.pl/katalog/lektura/template/' - _re_wl_uri = re.compile(r'http://(www\.)?wolnelektury.pl/katalog/lektur[ay]/' - '(?P[-a-z0-9]+)/?$') + _re_wl_uri = re.compile( + r'http://(www\.)?wolnelektury.pl/katalog/lektur[ay]/' + '(?P[-a-z0-9]+)/?$' + ) def __init__(self, uri): uri = six.text_type(uri) @@ -149,37 +158,47 @@ class DirDocProvider(DocProvider): from . import dcparser + DEFAULT_BOOKINFO = dcparser.BookInfo( - { RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template'}, - { DCNS('creator'): [u'Some, Author'], - DCNS('title'): [u'Some Title'], - DCNS('subject.period'): [u'Unknown'], - DCNS('subject.type'): [u'Unknown'], - DCNS('subject.genre'): [u'Unknown'], - DCNS('date'): ['1970-01-01'], - DCNS('language'): [u'pol'], - # DCNS('date'): [creation_date], - DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], - DCNS('description'): - [u"""Publikacja zrealizowana w ramach projektu - Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa - wykonana przez Bibliotekę Narodową z egzemplarza - pochodzącego ze zbiorów BN."""], - DCNS('identifier.url'): [WLURI.example], - DCNS('rights'): - [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] }) + { + RDFNS('about'): u'http://wiki.wolnepodreczniki.pl/Lektury:Template' + }, + { + DCNS('creator'): [u'Some, Author'], + DCNS('title'): [u'Some Title'], + DCNS('subject.period'): [u'Unknown'], + DCNS('subject.type'): [u'Unknown'], + DCNS('subject.genre'): [u'Unknown'], + DCNS('date'): ['1970-01-01'], + DCNS('language'): [u'pol'], + # DCNS('date'): [creation_date], + DCNS('publisher'): [u"Fundacja Nowoczesna Polska"], + DCNS('description'): + [u"""Publikacja zrealizowana w ramach projektu + Wolne Lektury (http://wolnelektury.pl). Reprodukcja cyfrowa + wykonana przez Bibliotekę Narodową z egzemplarza + pochodzącego ze zbiorów BN."""], + DCNS('identifier.url'): [WLURI.example], + DCNS('rights'): + [u"Domena publiczna - zm. [OPIS STANU PRAWNEGO TEKSTU]"] + } +) + def xinclude_forURI(uri): e = etree.Element(XINS("include")) e.set("href", uri) return etree.tostring(e, encoding='unicode') + def wrap_text(ocrtext, creation_date, bookinfo=DEFAULT_BOOKINFO): """Wrap the text within the minimal XML structure with a DC template.""" bookinfo.created_at = creation_date - dcstring = etree.tostring(bookinfo.to_etree(), \ - method='xml', encoding='unicode', pretty_print=True) + dcstring = etree.tostring( + bookinfo.to_etree(), method='xml', encoding='unicode', + pretty_print=True + ) return u'\n' + dcstring + u'\n\n' + ocrtext + \ u'\n\n' @@ -190,18 +209,21 @@ def serialize_raw(element): for child in element.iterchildren(): e = etree.tostring(child, method='xml', encoding='unicode', - pretty_print=True) + pretty_print=True) b += e return b + SERIALIZERS = { 'raw': serialize_raw, } + def serialize_children(element, format='raw'): return SERIALIZERS[format](element) + def get_resource(path): return os.path.join(os.path.dirname(__file__), path) @@ -276,4 +298,6 @@ class OutputFile(object): class URLOpener(FancyURLopener): version = 'FNP Librarian (http://github.com/fnp/librarian)' + + urllib._urlopener = URLOpener() diff --git a/src/librarian/embeds/mathml.py b/src/librarian/embeds/mathml.py index 801c213..16fa75b 100644 --- a/src/librarian/embeds/mathml.py +++ b/src/librarian/embeds/mathml.py @@ -12,13 +12,13 @@ class MathML(TreeEmbed): def to_latex(self): """ >>> print(MathML(etree.fromstring( - 'a < b' - )).to_latex().data.strip()) + ... 'a < b' + ... )).to_latex().data.strip()) a < b >>> print(MathML(etree.fromstring( - '< & &lt; A' - )).to_latex().data.strip()) + ... '< & &lt; A' + ... )).to_latex().data.strip()) < & < A """ diff --git a/src/librarian/epub.py b/src/librarian/epub.py index be9488a..137796e 100644 --- a/src/librarian/epub.py +++ b/src/librarian/epub.py @@ -82,7 +82,9 @@ def inner_xml(node): """ nt = node.text if node.text is not None else '' - return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node]) + return ''.join( + [nt] + [etree.tostring(child, encoding='unicode') for child in node] + ) def set_inner_xml(node, text): @@ -122,7 +124,10 @@ def xslt(xml, sheet, **kwargs): xml = etree.ElementTree(xml) with open(sheet) as xsltf: transform = etree.XSLT(etree.parse(xsltf)) - params = dict((key, transform.strparam(value)) for key, value in kwargs.items()) + params = dict( + (key, transform.strparam(value)) + for key, value in kwargs.items() + ) return transform(xml, **params) @@ -170,11 +175,17 @@ class Stanza(object): Slashes may only occur directly in the stanza. Any slashes in subelements will be ignored, and the subelements will be put inside verse elements. - >>> s = etree.fromstring("a c c/\\nbx/\\nyc/ \\nd") + >>> s = etree.fromstring( + ... "a c c/\\nbx/\\nyc/ \\nd" + ... ) >>> Stanza(s).versify() - >>> print(etree.tostring(s, encoding='unicode')) - a ccbx/ - ycd + >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip()) + + a cc + bx/ + yc + d + """ def __init__(self, stanza_elem): @@ -190,7 +201,10 @@ class Stanza(object): tail = self.stanza.tail self.stanza.clear() self.stanza.tail = tail - self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0) + self.stanza.extend( + verse for verse in self.verses + if verse.text or len(verse) > 0 + ) def open_normal_verse(self): self.open_verse = self.stanza.makeelement("wers_normalny") @@ -249,7 +263,10 @@ def add_to_manifest(manifest, partno): def add_to_spine(spine, partno): """ Adds a node to the spine section in content.opf file """ - e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno}) + e = spine.makeelement( + OPFNS('itemref'), + attrib={'idref': 'part%d' % partno} + ) spine.append(e) @@ -348,7 +365,8 @@ def chop(main_text): last_node_part = False - # the below loop are workaround for a problem with epubs in drama ebooks without acts + # The below loop are workaround for a problem with epubs + # in drama ebooks without acts. is_scene = False is_act = False for one_part in main_text: @@ -376,7 +394,10 @@ def chop(main_text): yield part_xml last_node_part = True main_xml_part[:] = [deepcopy(one_part)] - elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): + elif (not last_node_part + and name in ( + "naglowek_rozdzial", "naglowek_akt", "srodtytul" + )): yield part_xml main_xml_part[:] = [deepcopy(one_part)] else: @@ -385,8 +406,12 @@ def chop(main_text): yield part_xml -def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]): - """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """ +def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, + _empty_html_static=[]): + """ + Transforms one chunk, returns a HTML string, a TOC object + and a set of used characters. + """ toc = TOC() for element in chunk_xml[0]: @@ -395,11 +420,13 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"): toc.add(node_name(element), "part%d.html" % chunk_no) elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) + subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, + level=1, is_part=False) element.set('sub', str(subnumber)) if empty: if not _empty_html_static: - _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read()) + with open(get_resource('epub/emptyChunk.html')) as f: + _empty_html_static.append(f.read()) chars = set() output_html = _empty_html_static[0] else: @@ -417,7 +444,8 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s def transform(wldoc, verbose=False, style=None, html_toc=False, - sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'): + sample=None, cover=None, flags=None, hyphenate=False, + ilustr_path='', output_type='epub'): """ produces a EPUB file sample=n: generate sample e-book (with at least n paragraphs) @@ -430,7 +458,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, replace_characters(wldoc.edoc.getroot()) - hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None + hyphenator = set_hyph_language( + wldoc.edoc.getroot() + ) if hyphenate else None hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator) # every input file will have a TOC entry, @@ -439,7 +469,8 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, chars = set() if first: # write book title page - html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type) + html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), + outputtype=output_type) chars = used_chars(html_tree.getroot()) html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, @@ -456,15 +487,17 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, chars = set() html_string = open(get_resource('epub/emptyChunk.html')).read() else: - html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl')) + html_tree = xslt(wldoc.edoc, + get_resource('epub/xsltChunkTitle.xsl')) chars = used_chars(html_tree.getroot()) html_string = etree.tostring( html_tree, pretty_print=True, xml_declaration=True, encoding="utf-8", - doctype='' ) - zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string)) + zip.writestr('OPS/part%d.html' % chunk_counter, + squeeze_whitespace(html_string)) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 @@ -485,12 +518,16 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if sample <= 0: empty = True else: - sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog')) - chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty) + sample -= len(chunk_xml.xpath( + '//strofa|//akap|//akap_cd|//akap_dialog' + )) + chunk_html, chunk_toc, chunk_chars = transform_chunk( + chunk_xml, chunk_counter, annotations, empty) toc.extend(chunk_toc) chars = chars.union(chunk_chars) - zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html)) + zip.writestr('OPS/part%d.html' % chunk_counter, + squeeze_whitespace(chunk_html)) add_to_manifest(manifest, chunk_counter) add_to_spine(spine, chunk_counter) chunk_counter += 1 @@ -524,18 +561,21 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if document.book_info.thanks: document.edoc.getroot().set('thanks', document.book_info.thanks) - opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) + opf = xslt(document.book_info.to_etree(), + get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) guide = opf.find('.//' + OPFNS('guide')) spine = opf.find('.//' + OPFNS('spine')) - output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False) + output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', + delete=False) zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED) functions.reg_mathml_epub(zip) if os.path.isdir(ilustr_path): - ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr')) + ilustr_elements = set(ilustr.get('src') + for ilustr in document.edoc.findall('//ilustr')) for i, filename in enumerate(os.listdir(ilustr_path)): if filename not in ilustr_elements: continue @@ -543,7 +583,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, zip.write(file_path, os.path.join('OPS', filename)) image_id = 'image%s' % i manifest.append(etree.fromstring( - '' % (image_id, filename, guess_type(file_path)[0]))) + '' % ( + image_id, filename, guess_type(file_path)[0]) + )) # write static elements mime = zipfile.ZipInfo() @@ -590,17 +632,28 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if bound_cover.uses_dc_cover: if document.book_info.cover_by: - document.edoc.getroot().set('data-cover-by', document.book_info.cover_by) + document.edoc.getroot().set('data-cover-by', + document.book_info.cover_by) if document.book_info.cover_source: - document.edoc.getroot().set('data-cover-source', document.book_info.cover_source) + document.edoc.getroot().set('data-cover-source', + document.book_info.cover_source) manifest.append(etree.fromstring( - '')) + '' + )) manifest.append(etree.fromstring( - '' % (cover_name, bound_cover.mime_type()))) + '' % ( + cover_name, bound_cover.mime_type() + ) + )) spine.insert(0, etree.fromstring('')) - opf.getroot()[0].append(etree.fromstring('')) - guide.append(etree.fromstring('')) + opf.getroot()[0].append(etree.fromstring( + '' + )) + guide.append(etree.fromstring( + '' + )) annotations = etree.Element('annotations') @@ -616,10 +669,14 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if html_toc: manifest.append(etree.fromstring( - '')) + '' + )) spine.append(etree.fromstring( '')) - guide.append(etree.fromstring('')) + guide.append(etree.fromstring( + '' + )) toc, chunk_counter, chars, sample = transform_file(document, sample=sample) @@ -630,7 +687,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, if len(annotations) > 0: toc.add("Przypisy", "annotations.html") manifest.append(etree.fromstring( - '')) + '' + )) spine.append(etree.fromstring( '')) replace_by_verse(annotations) @@ -645,7 +704,9 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, toc.add("Wesprzyj Wolne Lektury", "support.html") manifest.append(etree.fromstring( - '')) + '' + )) spine.append(etree.fromstring( '')) html_string = open(get_resource('epub/support.html'), 'rb').read() @@ -654,10 +715,13 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( - '')) + '' + )) spine.append(etree.fromstring( '')) - html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type) + html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), + outputtype=output_type) chars.update(used_chars(html_tree.getroot())) zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring( html_tree, pretty_print=True, xml_declaration=True, @@ -674,8 +738,10 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, except OSError: cwd = None - os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) - for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': + os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'font-optimizer')) + for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', + 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'): optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), get_resource('fonts/' + fname), @@ -686,17 +752,22 @@ def transform(wldoc, verbose=False, style=None, html_toc=False, subprocess.check_call(optimizer_call, env=env) else: dev_null = open(os.devnull, 'w') - subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env) + subprocess.check_call(optimizer_call, stdout=dev_null, + stderr=dev_null, env=env) zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) manifest.append(etree.fromstring( - '' % (fname, fname))) + '' + % (fname, fname) + )) rmtree(tmpdir) if cwd is not None: os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True, xml_declaration=True, encoding="utf-8")) title = document.book_info.title - attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber" + attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount", + "dtb:maxPageNumber") for st in attributes: meta = toc_file.makeelement(NCXNS('meta')) meta.set('name', st) diff --git a/src/librarian/parser.py b/src/librarian/parser.py index 6cce0f7..2bb9509 100644 --- a/src/librarian/parser.py +++ b/src/librarian/parser.py @@ -33,16 +33,21 @@ class WLDocument(object): dc_path = './/' + RDFNS('RDF') if root_elem.tag != 'utwor': - raise ValidationError("Invalid root element. Found '%s', should be 'utwor'" % root_elem.tag) + raise ValidationError( + "Invalid root element. Found '%s', should be 'utwor'" + % root_elem.tag + ) if parse_dublincore: self.rdf_elem = root_elem.find(dc_path) if self.rdf_elem is None: - raise NoDublinCore("Document must have a '%s' element." % RDFNS('RDF')) + raise NoDublinCore( + "Document must have a '%s' element." % RDFNS('RDF') + ) self.book_info = dcparser.BookInfo.from_element( - self.rdf_elem, fallbacks=meta_fallbacks, strict=strict) + self.rdf_elem, fallbacks=meta_fallbacks, strict=strict) else: self.book_info = None @@ -103,7 +108,9 @@ class WLDocument(object): if self.book_info is None: raise NoDublinCore('No Dublin Core in document.') for part_uri in self.book_info.parts: - yield self.from_file(self.provider.by_uri(part_uri), provider=self.provider) + yield self.from_file( + self.provider.by_uri(part_uri), provider=self.provider + ) def chunk(self, path): # convert the path to XPath @@ -150,7 +157,9 @@ class WLDocument(object): try: xpath = self.path_to_xpath(key) node = self.edoc.xpath(xpath)[0] - repl = etree.fromstring(u"<%s>%s" % (node.tag, data, node.tag)) + repl = etree.fromstring( + "<%s>%s" % (node.tag, data, node.tag) + ) node.getparent().replace(node, repl) except Exception as e: unmerged.append(repr((key, xpath, e))) @@ -160,8 +169,9 @@ class WLDocument(object): def clean_ed_note(self, note_tag='nota_red'): """ deletes forbidden tags from nota_red """ - for node in self.edoc.xpath('|'.join('//%s//%s' % (note_tag, tag) for tag in - ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))): + for node in self.edoc.xpath('|'.join( + '//%s//%s' % (note_tag, tag) for tag in + ('pa', 'pe', 'pr', 'pt', 'begin', 'end', 'motyw'))): tail = node.tail node.clear() node.tag = 'span' @@ -174,7 +184,8 @@ class WLDocument(object): """ if self.book_info is None: raise NoDublinCore('No Dublin Core in document.') - persons = set(self.book_info.editors + self.book_info.technical_editors) + persons = set(self.book_info.editors + + self.book_info.technical_editors) for child in self.parts(): persons.update(child.editors()) if None in persons: @@ -218,11 +229,16 @@ class WLDocument(object): from librarian import pdf return pdf.transform(self, *args, **kwargs) - def save_output_file(self, output_file, output_path=None, output_dir_path=None, make_author_dir=False, ext=None): + def save_output_file(self, output_file, output_path=None, + output_dir_path=None, make_author_dir=False, + ext=None): if output_dir_path: save_path = output_dir_path if make_author_dir: - save_path = os.path.join(save_path, six.text_type(self.book_info.author).encode('utf-8')) + save_path = os.path.join( + save_path, + six.text_type(self.book_info.author).encode('utf-8') + ) save_path = os.path.join(save_path, self.book_info.url.slug) if ext: save_path += '.%s' % ext diff --git a/src/librarian/picture.py b/src/librarian/picture.py index eeb8e8e..93d8cb9 100644 --- a/src/librarian/picture.py +++ b/src/librarian/picture.py @@ -181,7 +181,9 @@ class WLPicture(object): return [[0, 0], [-1, -1]] def has_all_props(node, props): - return reduce(and_, map(lambda prop: prop in node.attrib, props)) + return six.moves.reduce( + and_, map(lambda prop: prop in node.attrib, props) + ) if not has_all_props(area, ['x1', 'x2', 'y1', 'y2']): return None diff --git a/src/librarian/res/text/template.txt b/src/librarian/res/text/template.txt new file mode 100644 index 0000000..fa1429c --- /dev/null +++ b/src/librarian/res/text/template.txt @@ -0,0 +1,12 @@ +%(text)s + + +----- +Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl. +Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s. + +Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska. + +%(license_description)s.%(source)s%(publisher)s + +%(description)s%(contributors)s%(funders)s%(isbn)s diff --git a/src/librarian/text.py b/src/librarian/text.py index d0531a4..8e3960d 100644 --- a/src/librarian/text.py +++ b/src/librarian/text.py @@ -6,7 +6,7 @@ from __future__ import unicode_literals import copy -from librarian import functions, OutputFile +from librarian import functions, OutputFile, get_resource from lxml import etree import os import six @@ -17,20 +17,9 @@ functions.reg_wrap_words() functions.reg_strip() functions.reg_person_name() -TEMPLATE = u"""\ -%(text)s - ------ -Ta lektura, podobnie jak tysiące innych, dostępna jest na stronie wolnelektury.pl. -Wersja lektury w opracowaniu merytorycznym i krytycznym (przypisy i motywy) dostępna jest na stronie %(url)s. - -Utwór opracowany został w ramach projektu Wolne Lektury przez fundację Nowoczesna Polska. - -%(license_description)s.%(source)s%(publisher)s - -%(description)s%(contributors)s%(funders)s%(isbn)s -""" +with open(get_resource("res/text/template.txt")) as f: + TEMPLATE = f.read() def transform(wldoc, flags=None, **options): diff --git a/src/librarian/util.py b/src/librarian/util.py index c302084..5c9fbc2 100644 --- a/src/librarian/util.py +++ b/src/librarian/util.py @@ -1,10 +1,11 @@ -# Functions to convert between integers and Roman numerals. Doctest examples included. -# by Paul Winkler +# Functions to convert between integers and Roman numerals. +# by Paul Winkler # http://code.activestate.com/recipes/81611-roman-numerals/ # PSFL (GPL compatible) from __future__ import print_function, unicode_literals import os +import six def int_to_roman(input): @@ -51,12 +52,13 @@ def int_to_roman(input): >>> print(int_to_roman(1999)) MCMXCIX """ - if type(input) != type(1): + if not isinstance(input, int): raise TypeError("expected integer, got %s" % type(input)) if not 0 < input < 4000: raise ValueError("Argument must be between 1 and 3999") ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1) - nums = ('M', 'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I') + nums = ('M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', + 'I') result = "" for i in range(len(ints)): count = int(input / ints[i]) @@ -64,10 +66,11 @@ def int_to_roman(input): input -= ints[i] * count return result + def roman_to_int(input): """ Convert a roman numeral to an integer. - + >>> r = list(range(1, 4000)) >>> nums = [int_to_roman(i) for i in r] >>> ints = [roman_to_int(n) for n in nums] @@ -91,21 +94,21 @@ def roman_to_int(input): ... ValueError: input is not a valid roman numeral: IL """ - if type(input) != type(""): + if not isinstance(input, six.text_type): raise TypeError("expected string, got %s" % type(input)) input = input.upper() nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I'] ints = [1000, 500, 100, 50, 10, 5, 1] places = [] for c in input: - if not c in nums: + if c not in nums: raise ValueError("input is not a valid roman numeral: %s" % input) for i in range(len(input)): c = input[i] value = ints[nums.index(c)] # If the next place holds a larger number, this value is negative. try: - nextvalue = ints[nums.index(input[i +1])] + nextvalue = ints[nums.index(input[i + 1])] if nextvalue > value: value *= -1 except IndexError: @@ -113,7 +116,8 @@ def roman_to_int(input): pass places.append(value) sum = 0 - for n in places: sum += n + for n in places: + sum += n # Easiest test for validity... if int_to_roman(sum) == input: return sum