X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/239d66922f4b83ee5baaa284a9c33a32bfcb99a4..fdab956ee44a8ba9fe306e37a959aa85aa27cbdd:/bin/book2html.py diff --git a/bin/book2html.py b/bin/book2html.py index 5bd2bb527..3907de38c 100755 --- a/bin/book2html.py +++ b/bin/book2html.py @@ -9,39 +9,42 @@ import sys from lxml import etree -# Parse args -usage = """Usage: %prog [options] SOURCE [SOURCE...] -Convert SOURCE files to HTML format.""" - -parser = optparse.OptionParser(usage=usage) - -parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print status messages to stdout') - -options, input_filenames = parser.parse_args() - -if len(input_filenames) < 1: - parser.print_help() - exit(1) - -# Parse XSLT -style = etree.parse('book2html.xslt') - -# Do some real work -for input_filename in input_filenames: - if options.verbose: - print input_filename - - output_filename = os.path.splitext(input_filename)[0] + '.html' - - # Transform +ENTITY_SUBSTITUTIONS = [ + (u'---', u'—'), + (u'--', u'–'), + (u'...', u'…'), + (u',,', u'„'), + (u'"', u'”'), +] + + +def substitute_entities(context, text): + """XPath extension function converting all entites in passed text.""" + if isinstance(text, list): + text = ''.join(text) + for entity, substitutution in ENTITY_SUBSTITUTIONS: + text = text.replace(entity, substitutution) + return text + + +# Register substitute_entities function with lxml +ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') +ns['substitute_entities'] = substitute_entities + + +def transform(input_filename, output_filename): + """Transforms file input_filename in XML to output_filename in XHTML.""" + # Parse XSLT + style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') + style = etree.parse(style_filename) + doc_file = cStringIO.StringIO() expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE); f = open(input_filename, 'r') for line in f: line = line.decode('utf-8') - line = expr.sub(u'
\n', line).replace(u'---', u'—').replace(u',,', u'„') + line = expr.sub(u'
\n', line) doc_file.write(line.encode('utf-8')) f.close() @@ -53,3 +56,28 @@ for input_filename in input_filenames: result = doc.xslt(style) result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8') + +if __name__ == '__main__': + # Parse commandline arguments + usage = """Usage: %prog [options] SOURCE [SOURCE...] + Convert SOURCE files to HTML format.""" + + parser = optparse.OptionParser(usage=usage) + + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, + help='print status messages to stdout') + + options, input_filenames = parser.parse_args() + + if len(input_filenames) < 1: + parser.print_help() + exit(1) + + # Do some real work + for input_filename in input_filenames: + if options.verbose: + print input_filename + + output_filename = os.path.splitext(input_filename)[0] + '.html' + transform(input_filename, output_filename) +