1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 from __future__ import unicode_literals
10 from ebooklib import epub
12 from librarian.dcparser import Person
13 from librarian import get_resource
16 def _register_function(f):
17 """ Register extension function with lxml """
18 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
22 def reg_substitute_entities():
23 entity_substitutions = [
31 def substitute_entities(context, text):
32 """XPath extension function converting all entites in passed text."""
33 if isinstance(text, list):
35 for entity, substitutution in entity_substitutions:
36 text = text.replace(entity, substitutution)
39 _register_function(substitute_entities)
43 def strip(context, text):
44 """Remove unneeded whitespace from beginning and end"""
45 if isinstance(text, list):
47 return re.sub(r'\s+', ' ', text).strip()
48 _register_function(strip)
51 def reg_starts_white():
52 def starts_white(context, text):
53 if isinstance(text, list):
57 return text[0].isspace()
58 _register_function(starts_white)
62 def ends_white(context, text):
63 if isinstance(text, list):
67 return text[-1].isspace()
68 _register_function(ends_white)
72 def wrap_words(context, text, wrapping):
74 XPath extension function automatically wrapping words
77 if isinstance(text, list):
82 words = re.split(r'\s', text)
87 line_length += len(word) + 1
88 if line_length > wrapping:
89 # Max line length was exceeded. We create new line
91 line_length = len(word)
92 lines[-1].append(word)
93 return '\n'.join(' '.join(line) for line in lines)
94 _register_function(wrap_words)
97 def reg_person_name():
98 def person_name(context, text):
99 """ Converts "Name, Forename" to "Forename Name" """
100 if isinstance(text, list):
102 return Person.from_text(text).readable()
103 _register_function(person_name)
106 def reg_texcommand():
107 def texcommand(context, text):
108 """Remove non-letters"""
109 if isinstance(text, list):
111 return re.sub(r'[^a-zA-Z]', '', text).strip()
112 _register_function(texcommand)
115 def lang_code_3to2(text):
116 """Convert 3-letter language code to 2-letter code"""
119 with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
120 for line in f.read().decode('latin1').split('\n'):
121 codes = line.strip().split('|')
130 def mathml_latex(context, trees):
131 from librarian.embeds.mathml import MathML
132 text = MathML(trees[0]).to_latex().data
133 # Remove invisible multiplications, they produce unwanted spaces.
134 text = text.replace(u'\u2062', '')
138 def reg_mathml_latex():
139 _register_function(mathml_latex)
142 def reg_mathml_epub(output):
143 from librarian.embeds.mathml import MathML
145 def mathml(context, trees):
146 data = MathML(trees[0]).to_latex().to_png().data
147 name = "math%d.png" % mathml.count
151 uid='math%d' % mathml.count,
153 media_type='image/png',
160 _register_function(mathml)