X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/167d19bdfe76a77754d2e14a80ac0c25c725c379..b78837615c4598c3da8355073f5a0efe9951f86a:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 8427ba8..bd05ff4 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -6,53 +6,60 @@ from lxml import etree import re +from librarian.dcparser import Person + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') - ns[f.__name__] = f + ns[f.__name__] = lambda context, *args: f(*args) -def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ - (u'---', u'—'), - (u'--', u'–'), - (u'...', u'…'), - (u',,', u'„'), - (u'"', u'”'), - ] +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] - def substitute_entities(context, text): - """XPath extension function converting all entites in passed text.""" - if isinstance(text, list): - text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: - text = text.replace(entity, substitutution) - return text +def substitute_entities(text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + +def reg_substitute_entities(): _register_function(substitute_entities) +def strip(text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + + def reg_strip(): - def strip(context, text): - """Remove unneeded whitespace from beginning and end""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'\s+', ' ', text).strip() _register_function(strip) +def starts_white(text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + + def reg_starts_white(): - def starts_white(context, text): - if isinstance(text, list): - text = ''.join(text) - if not text: - return False - return text[0].isspace() _register_function(starts_white) def reg_ends_white(): - def ends_white(context, text): + def ends_white(text): if isinstance(text, list): text = ''.join(text) if not text: @@ -61,35 +68,64 @@ def reg_ends_white(): _register_function(ends_white) +def wrap_words(text, wrapping): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + if not wrapping: + return text + + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > wrapping: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + + def reg_wrap_words(): - def wrap_words(context, text, wrapping): - """XPath extension function automatically wrapping words in passed text""" - if isinstance(text, list): - text = ''.join(text) - if not wrapping: - return text - - words = re.split(r'\s', text) - - line_length = 0 - lines = [[]] - for word in words: - line_length += len(word) + 1 - if line_length > wrapping: - # Max line length was exceeded. We create new line - lines.append([]) - line_length = len(word) - lines[-1].append(word) - return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) +def person_name(text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + + def reg_person_name(): - def person_name(context, text): - """ Converts "Name, Forename" to "Forename Name" """ - if isinstance(text, list): - text = ''.join(text) - return ' '.join([t.strip() for t in text.split(',', 1)[::-1]]) _register_function(person_name) +def texcommand(text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + + +def reg_texcommand(): + _register_function(texcommand) + + +def reg_get(format_): + def get(*args): + obj = format_ + for arg in args: + if hasattr(obj, arg): + obj = getattr(obj, arg) + else: + try: + obj = obj[arg] + except (TypeError, KeyError), e: + # Just raise proper AttributeError. + getattr(obj, arg) + return obj + _register_function(get)