X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fc2ee5e934d0da3535b9be659bbf67ebe9437b47..84e70971964e9ead7ee75ea37a404d01666a1f98:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 5ad723d..4974f0d 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -5,6 +5,7 @@ # from lxml import etree import re +from urllib import quote from librarian.dcparser import Person @@ -14,7 +15,7 @@ def _register_function(f): ns[f.__name__] = f -def reg_substitute_entities(): +def reg_substitute_entities(): ENTITY_SUBSTITUTIONS = [ (u'---', u'—'), (u'--', u'–'), @@ -91,8 +92,45 @@ def reg_person_name(): """ Converts "Name, Forename" to "Forename Name" """ if isinstance(text, list): text = ''.join(text) - p = Person.from_text(text) - return ' '.join(p.first_names + (p.last_name,)) + return Person.from_text(text).readable() _register_function(person_name) +def reg_texcommand(): + def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + _register_function(texcommand) + + +def reg_urlquote(): + def urlquote(content, text): + """ Quotes URLs """ + if isinstance(text, list): + text = ''.join(text) + return quote(text.encode('utf-8'), safe="/:") + _register_function(urlquote) + +def reg_breakurl(): + def breakurl(content, text): + """ Allows breaks in urls """ + if isinstance(text, list): + text = ''.join(text) + chunks = text.split("/") + e = etree.Element("span") + e.text = chunks[0] + ret = etree.Element("span") + ret.append(e) + for chunk in chunks[1:]: + ret.append(etree.Element("span", text="/")) + ret[-1].text = "/" + ret.append(etree.Element("cmd", {"name": "linebreak"})) + ret[-1].append(etree.Element("opt")) + ret[-1][-1].text = "1" + ret[-1].tail = chunk + return ret + #return re.sub(r'(/)([^/])', r'\1\\-\2', text) + _register_function(breakurl) +