X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/167d19bdfe76a77754d2e14a80ac0c25c725c379..476f5bc05232372880327aa62b3ea8c9f7dea13b:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 8427ba8..40f06cd 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -6,48 +6,55 @@ from lxml import etree import re +from librarian.dcparser import Person + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns[f.__name__] = f -def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ - (u'---', u'—'), - (u'--', u'–'), - (u'...', u'…'), - (u',,', u'„'), - (u'"', u'”'), - ] +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] - def substitute_entities(context, text): - """XPath extension function converting all entites in passed text.""" - if isinstance(text, list): - text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: - text = text.replace(entity, substitutution) - return text +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + +def reg_substitute_entities(): _register_function(substitute_entities) +def strip(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + + def reg_strip(): - def strip(context, text): - """Remove unneeded whitespace from beginning and end""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'\s+', ' ', text).strip() _register_function(strip) +def starts_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + + def reg_starts_white(): - def starts_white(context, text): - if isinstance(text, list): - text = ''.join(text) - if not text: - return False - return text[0].isspace() _register_function(starts_white) @@ -61,35 +68,64 @@ def reg_ends_white(): _register_function(ends_white) +def wrap_words(context, text, wrapping): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + if not wrapping: + return text + + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > wrapping: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + + def reg_wrap_words(): - def wrap_words(context, text, wrapping): - """XPath extension function automatically wrapping words in passed text""" - if isinstance(text, list): - text = ''.join(text) - if not wrapping: - return text - - words = re.split(r'\s', text) - - line_length = 0 - lines = [[]] - for word in words: - line_length += len(word) + 1 - if line_length > wrapping: - # Max line length was exceeded. We create new line - lines.append([]) - line_length = len(word) - lines[-1].append(word) - return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) +def person_name(context, text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + + def reg_person_name(): - def person_name(context, text): - """ Converts "Name, Forename" to "Forename Name" """ - if isinstance(text, list): - text = ''.join(text) - return ' '.join([t.strip() for t in text.split(',', 1)[::-1]]) _register_function(person_name) +def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + + +def reg_texcommand(): + _register_function(texcommand) + + +def reg_get(format_): + def get(context, *args): + obj = format_ + for arg in args: + if hasattr(obj, arg): + obj = getattr(obj, arg) + else: + try: + obj = obj[arg] + except (TypeError, KeyError), e: + # Just raise proper AttributeError. + getattr(obj, arg) + return obj + _register_function(get)