X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/3ef92f8d83bda87d43e1f36a79e5c1e7d853365a..fdab956ee44a8ba9fe306e37a959aa85aa27cbdd:/bin/book2html.py diff --git a/bin/book2html.py b/bin/book2html.py index f66117d5d..3907de38c 100755 --- a/bin/book2html.py +++ b/bin/book2html.py @@ -9,6 +9,29 @@ import sys from lxml import etree +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +# Register substitute_entities function with lxml +ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') +ns['substitute_entities'] = substitute_entities + + def transform(input_filename, output_filename): """Transforms file input_filename in XML to output_filename in XHTML.""" # Parse XSLT @@ -21,7 +44,7 @@ def transform(input_filename, output_filename): f = open(input_filename, 'r') for line in f: line = line.decode('utf-8') - line = expr.sub(u'
\n', line).replace(u'---', u'—').replace(u',,', u'„') + line = expr.sub(u'
\n', line) doc_file.write(line.encode('utf-8')) f.close()