f66117d5da082f1d829d476128972168f92640c3
[wolnelektury.git] / bin / book2html.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 import cStringIO
4 import re
5 import optparse
6 import os
7 import sys
8
9 from lxml import etree
10
11
12 def transform(input_filename, output_filename):
13     """Transforms file input_filename in XML to output_filename in XHTML."""
14     # Parse XSLT
15     style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
16     style = etree.parse(style_filename)
17
18     doc_file = cStringIO.StringIO()
19     expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
20
21     f = open(input_filename, 'r')
22     for line in f:
23         line = line.decode('utf-8')
24         line = expr.sub(u'<br/>\n', line).replace(u'---', u'—').replace(u',,', u'„')
25         doc_file.write(line.encode('utf-8'))
26     f.close()
27
28     doc_file.seek(0);
29
30     parser = etree.XMLParser(remove_blank_text=True)
31     doc = etree.parse(doc_file, parser)
32
33     result = doc.xslt(style)
34     result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
35
36
37 if __name__ == '__main__':
38     # Parse commandline arguments
39     usage = """Usage: %prog [options] SOURCE [SOURCE...]
40     Convert SOURCE files to HTML format."""
41
42     parser = optparse.OptionParser(usage=usage)
43
44     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
45         help='print status messages to stdout')
46
47     options, input_filenames = parser.parse_args()
48
49     if len(input_filenames) < 1:
50         parser.print_help()
51         exit(1)
52
53     # Do some real work
54     for input_filename in input_filenames:
55         if options.verbose:
56             print input_filename
57         
58         output_filename = os.path.splitext(input_filename)[0] + '.html'
59         transform(input_filename, output_filename)
60