X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/223fd8f247b4a588d263afaf798dca4cb9ffa639..5bc29f19f308610c944d63597962fb3b0f468c54:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 6d52b84..9490cbb 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -6,13 +6,15 @@ from lxml import etree import re +from librarian.dcparser import Person + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns[f.__name__] = f -def reg_substitute_entities(): +def reg_substitute_entities(): ENTITY_SUBSTITUTIONS = [ (u'---', u'—'), (u'--', u'–'), @@ -41,6 +43,26 @@ def reg_strip(): _register_function(strip) +def reg_starts_white(): + def starts_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + _register_function(starts_white) + + +def reg_ends_white(): + def ends_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[-1].isspace() + _register_function(ends_white) + + def reg_wrap_words(): def wrap_words(context, text, wrapping): """XPath extension function automatically wrapping words in passed text""" @@ -48,9 +70,9 @@ def reg_wrap_words(): text = ''.join(text) if not wrapping: return text - + words = re.split(r'\s', text) - + line_length = 0 lines = [[]] for word in words: @@ -63,3 +85,36 @@ def reg_wrap_words(): return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) + +def reg_person_name(): + def person_name(context, text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + _register_function(person_name) + + +def reg_texcommand(): + def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + _register_function(texcommand) + + +def reg_get(format_): + def get(context, *args): + obj = format_ + for arg in args: + if hasattr(obj, arg): + obj = getattr(obj, arg) + else: + try: + obj = obj[arg] + except (TypeError, KeyError), e: + # Just raise proper AttributeError. + getattr(obj, arg) + return obj + _register_function(get)