2 # -*- coding: utf-8 -*-
12 ENTITY_SUBSTITUTIONS = [
21 def substitute_entities(context, text):
22 """XPath extension function converting all entites in passed text."""
23 if isinstance(text, list):
25 for entity, substitutution in ENTITY_SUBSTITUTIONS:
26 text = text.replace(entity, substitutution)
30 # Register substitute_entities function with lxml
31 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
32 ns['substitute_entities'] = substitute_entities
35 def transform(input_filename, output_filename):
36 """Transforms file input_filename in XML to output_filename in XHTML."""
38 style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
39 style = etree.parse(style_filename)
41 doc_file = cStringIO.StringIO()
42 expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
44 f = open(input_filename, 'r')
46 line = line.decode('utf-8')
47 line = expr.sub(u'<br/>\n', line)
48 doc_file.write(line.encode('utf-8'))
53 parser = etree.XMLParser(remove_blank_text=True)
54 doc = etree.parse(doc_file, parser)
56 result = doc.xslt(style)
57 result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
60 if __name__ == '__main__':
61 # Parse commandline arguments
62 usage = """Usage: %prog [options] SOURCE [SOURCE...]
63 Convert SOURCE files to HTML format."""
65 parser = optparse.OptionParser(usage=usage)
67 parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
68 help='print status messages to stdout')
70 options, input_filenames = parser.parse_args()
72 if len(input_filenames) < 1:
77 for input_filename in input_filenames:
81 output_filename = os.path.splitext(input_filename)[0] + '.html'
82 transform(input_filename, output_filename)