X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/223fd8f247b4a588d263afaf798dca4cb9ffa639..a1eee7c004e2068b705bb4475633f76f2e38b4c4:/librarian/functions.py?ds=sidebyside diff --git a/librarian/functions.py b/librarian/functions.py index 6d52b84..e5a47d6 100644 --- a/librarian/functions.py +++ b/librarian/functions.py @@ -3,17 +3,23 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from __future__ import unicode_literals + from lxml import etree import re +from librarian.dcparser import Person +from librarian import get_resource + + def _register_function(f): """ Register extension function with lxml """ ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns[f.__name__] = f -def reg_substitute_entities(): - ENTITY_SUBSTITUTIONS = [ +def reg_substitute_entities(): + entity_substitutions = [ (u'---', u'—'), (u'--', u'–'), (u'...', u'…'), @@ -25,7 +31,7 @@ def reg_substitute_entities(): """XPath extension function converting all entites in passed text.""" if isinstance(text, list): text = ''.join(text) - for entity, substitutution in ENTITY_SUBSTITUTIONS: + for entity, substitutution in entity_substitutions: text = text.replace(entity, substitutution) return text @@ -41,6 +47,26 @@ def reg_strip(): _register_function(strip) +def reg_starts_white(): + def starts_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[0].isspace() + _register_function(starts_white) + + +def reg_ends_white(): + def ends_white(context, text): + if isinstance(text, list): + text = ''.join(text) + if not text: + return False + return text[-1].isspace() + _register_function(ends_white) + + def reg_wrap_words(): def wrap_words(context, text, wrapping): """XPath extension function automatically wrapping words in passed text""" @@ -48,9 +74,9 @@ def reg_wrap_words(): text = ''.join(text) if not wrapping: return text - + words = re.split(r'\s', text) - + line_length = 0 lines = [[]] for word in words: @@ -63,3 +89,62 @@ def reg_wrap_words(): return '\n'.join(' '.join(line) for line in lines) _register_function(wrap_words) + +def reg_person_name(): + def person_name(context, text): + """ Converts "Name, Forename" to "Forename Name" """ + if isinstance(text, list): + text = ''.join(text) + return Person.from_text(text).readable() + _register_function(person_name) + + +def reg_texcommand(): + def texcommand(context, text): + """Remove non-letters""" + if isinstance(text, list): + text = ''.join(text) + return re.sub(r'[^a-zA-Z]', '', text).strip() + _register_function(texcommand) + + +def reg_lang_code_3to2(): + def lang_code_3to2(context, text): + """Convert 3-letter language code to 2-letter code""" + result = '' + text = ''.join(text) + with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f: + for line in f.read().decode('latin1').split('\n'): + list = line.strip().split('|') + if list[0] == text: + result = list[2] + if result == '': + return text + else: + return result + _register_function(lang_code_3to2) + + +def mathml_latex(context, trees): + from librarian.embeds.mathml import MathML + text = MathML(trees[0]).to_latex().data + # Remove invisible multiplications, they produce unwanted spaces. + text = text.replace(u'\u2062', '') + return text + + +def reg_mathml_latex(): + _register_function(mathml_latex) + + +def reg_mathml_epub(zipf): + from librarian.embeds.mathml import MathML + + def mathml(context, trees): + data = MathML(trees[0]).to_latex().to_png().data + name = "math%d.png" % mathml.count + mathml.count += 1 + zipf.writestr('OPS/' + name, data) + return name + mathml.count = 0 + _register_function(mathml)