X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/167d19bdfe76a77754d2e14a80ac0c25c725c379..803cd9d0e8aeac8b4bacf197dfae80815367ec36:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 8427ba8..523b3d5 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -6,13 +6,15 @@ from lxml import etree import re +from librarian.dcparser import Person + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns[f.__name__] = f -def reg_substitute_entities(): +def reg_substitute_entities(): ENTITY_SUBSTITUTIONS = [ (u'---', u'—'), (u'--', u'–'), @@ -68,9 +70,9 @@ def reg_wrap_words(): text = ''.join(text) if not wrapping: return text - + words = re.split(r'\s', text) - + line_length = 0 lines = [[]] for word in words: @@ -89,7 +91,16 @@ def reg_person_name(): """ Converts "Name, Forename" to "Forename Name" """ if isinstance(text, list): text = ''.join(text) - return ' '.join([t.strip() for t in text.split(',', 1)[::-1]]) + return Person.from_text(text).readable() _register_function(person_name) +def reg_texcommand(): + def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + _register_function(texcommand) + +