1 # -*- coding: utf-8 -*-
9 from librarian import dcparser
12 ENTITY_SUBSTITUTIONS = [
21 def substitute_entities(context, text):
22 """XPath extension function converting all entites in passed text."""
23 if isinstance(text, list):
25 for entity, substitutution in ENTITY_SUBSTITUTIONS:
26 text = text.replace(entity, substitutution)
30 # Register substitute_entities function with lxml
31 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
32 ns['substitute_entities'] = substitute_entities
35 def transform(input_filename, output_filename):
36 """Transforms file input_filename in XML to output_filename in TXT."""
38 style_filename = os.path.join(os.path.dirname(__file__), 'book2txt.xslt')
39 style = etree.parse(style_filename)
41 doc_file = cStringIO.StringIO()
42 expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
44 f = open(input_filename, 'r')
46 line = line.decode('utf-8')
47 line = expr.sub(u'<br/>\n', line)
48 doc_file.write(line.encode('utf-8'))
53 parser = etree.XMLParser(remove_blank_text=True)
54 doc = etree.parse(doc_file, parser)
56 result = doc.xslt(style)
57 output_file = codecs.open(output_filename, 'wb', encoding='utf-8')
58 output_file.write(unicode(result) % dcparser.parse(input_filename).url)