X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/07793241cd7b314aadc8bad12d1c327545545e19..223fd8f247b4a588d263afaf798dca4cb9ffa639:/librarian/functions.py?ds=inline diff --git a/librarian/functions.py b/librarian/functions.py new file mode 100644 index 0000000..6d52b84 --- /dev/null +++ b/librarian/functions.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Librarian, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from lxml import etree +import re + +def _register_function(f): + """ Register extension function with lxml """ + ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') + ns[f.__name__] = f + + +def reg_substitute_entities(): + ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), + ] + + def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + _register_function(substitute_entities) + + +def reg_strip(): + def strip(context, text): + """Remove unneeded whitespace from beginning and end""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'\s+', ' ', text).strip() + _register_function(strip) + + +def reg_wrap_words(): + def wrap_words(context, text, wrapping): + """XPath extension function automatically wrapping words in passed text""" + if isinstance(text, list): + text = ''.join(text) + if not wrapping: + return text + + words = re.split(r'\s', text) + + line_length = 0 + lines = [[]] + for word in words: + line_length += len(word) + 1 + if line_length > wrapping: + # Max line length was exceeded. We create new line + lines.append([]) + line_length = len(word) + lines[-1].append(word) + return '\n'.join(' '.join(line) for line in lines) + _register_function(wrap_words) +