X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/5bc29f19f308610c944d63597962fb3b0f468c54..8be494f3ddda9f45ae3e454ec549e06f0eba9380:/librarian/functions.py diff --git a/librarian/functions.py b/librarian/functions.py index 9490cbb..bd05ff4 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -11,50 +11,55 @@ from librarian.dcparser import Person def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') - ns[f.__name__] = f + ns[f.__name__] = lambda context, *args: f(*args) -def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ - (u'---', u'—'), - (u'--', u'–'), - (u'...', u'…'), - (u',,', u'„'), - (u'"', u'”'), - ] - - def substitute_entities(context, text): - """XPath extension function converting all entites in passed text.""" - if isinstance(text, list): - text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: - text = text.replace(entity, substitutution) - return text +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] +def substitute_entities(text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +def reg_substitute_entities(): _register_function(substitute_entities) +def strip(text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + + def reg_strip(): - def strip(context, text): - """Remove unneeded whitespace from beginning and end""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'\s+', ' ', text).strip() _register_function(strip) +def starts_white(text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + + def reg_starts_white(): - def starts_white(context, text): - if isinstance(text, list): - text = ''.join(text) - if not text: - return False - return text[0].isspace() _register_function(starts_white) def reg_ends_white(): - def ends_white(context, text): + def ends_white(text): if isinstance(text, list): text = ''.join(text) if not text: @@ -63,49 +68,55 @@ def reg_ends_white(): _register_function(ends_white) +def wrap_words(text, wrapping): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + if not wrapping: + return text + + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > wrapping: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + + def reg_wrap_words(): - def wrap_words(context, text, wrapping): - """XPath extension function automatically wrapping words in passed text""" - if isinstance(text, list): - text = ''.join(text) - if not wrapping: - return text - - words = re.split(r'\s', text) - - line_length = 0 - lines = [[]] - for word in words: - line_length += len(word) + 1 - if line_length > wrapping: - # Max line length was exceeded. We create new line - lines.append([]) - line_length = len(word) - lines[-1].append(word) - return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) +def person_name(text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + + def reg_person_name(): - def person_name(context, text): - """ Converts "Name, Forename" to "Forename Name" """ - if isinstance(text, list): - text = ''.join(text) - return Person.from_text(text).readable() _register_function(person_name) +def texcommand(text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + + def reg_texcommand(): - def texcommand(context, text): - """Remove non-letters""" - if isinstance(text, list): - text = ''.join(text) - return re.sub(r'[^a-zA-Z]', '', text).strip() _register_function(texcommand) def reg_get(format_): - def get(context, *args): + def get(*args): obj = format_ for arg in args: if hasattr(obj, arg):