X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/5400caf9e791572a0800ab69cc5af4044f98268e..fdab956ee44a8ba9fe306e37a959aa85aa27cbdd:/bin/book2html.py?ds=sidebyside diff --git a/bin/book2html.py b/bin/book2html.py index e68435974..3907de38c 100755 --- a/bin/book2html.py +++ b/bin/book2html.py @@ -9,10 +9,34 @@ import sys from lxml import etree +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +# Register substitute_entities function with lxml +ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') +ns['substitute_entities'] = substitute_entities + + def transform(input_filename, output_filename): """Transforms file input_filename in XML to output_filename in XHTML.""" # Parse XSLT - style = etree.parse('book2html.xslt') + style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') + style = etree.parse(style_filename) doc_file = cStringIO.StringIO() expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); @@ -20,7 +44,7 @@ def transform(input_filename, output_filename): f = open(input_filename, 'r') for line in f: line = line.decode('utf-8') - line = expr.sub(u'
\n', line).replace(u'---', u'—').replace(u',,', u'„') + line = expr.sub(u'
\n', line) doc_file.write(line.encode('utf-8')) f.close()