X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/239d66922f4b83ee5baaa284a9c33a32bfcb99a4..fdab956ee44a8ba9fe306e37a959aa85aa27cbdd:/bin/book2html.py
diff --git a/bin/book2html.py b/bin/book2html.py
index 5bd2bb527..3907de38c 100755
--- a/bin/book2html.py
+++ b/bin/book2html.py
@@ -9,39 +9,42 @@ import sys
from lxml import etree
-# Parse args
-usage = """Usage: %prog [options] SOURCE [SOURCE...]
-Convert SOURCE files to HTML format."""
-
-parser = optparse.OptionParser(usage=usage)
-
-parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
- help='print status messages to stdout')
-
-options, input_filenames = parser.parse_args()
-
-if len(input_filenames) < 1:
- parser.print_help()
- exit(1)
-
-# Parse XSLT
-style = etree.parse('book2html.xslt')
-
-# Do some real work
-for input_filename in input_filenames:
- if options.verbose:
- print input_filename
-
- output_filename = os.path.splitext(input_filename)[0] + '.html'
-
- # Transform
+ENTITY_SUBSTITUTIONS = [
+ (u'---', u'â'),
+ (u'--', u'â'),
+ (u'...', u'â¦'),
+ (u',,', u'â'),
+ (u'"', u'â'),
+]
+
+
+def substitute_entities(context, text):
+ """XPath extension function converting all entites in passed text."""
+ if isinstance(text, list):
+ text = ''.join(text)
+ for entity, substitutution in ENTITY_SUBSTITUTIONS:
+ text = text.replace(entity, substitutution)
+ return text
+
+
+# Register substitute_entities function with lxml
+ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+ns['substitute_entities'] = substitute_entities
+
+
+def transform(input_filename, output_filename):
+ """Transforms file input_filename in XML to output_filename in XHTML."""
+ # Parse XSLT
+ style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt')
+ style = etree.parse(style_filename)
+
doc_file = cStringIO.StringIO()
expr = re.compile(r'/\s', re.MULTILINE | re.UNICODE);
f = open(input_filename, 'r')
for line in f:
line = line.decode('utf-8')
- line = expr.sub(u'
\n', line).replace(u'---', u'â').replace(u',,', u'â')
+ line = expr.sub(u'
\n', line)
doc_file.write(line.encode('utf-8'))
f.close()
@@ -53,3 +56,28 @@ for input_filename in input_filenames:
result = doc.xslt(style)
result.write(output_filename, xml_declaration=True, pretty_print=True, encoding='utf-8')
+
+if __name__ == '__main__':
+ # Parse commandline arguments
+ usage = """Usage: %prog [options] SOURCE [SOURCE...]
+ Convert SOURCE files to HTML format."""
+
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
+ help='print status messages to stdout')
+
+ options, input_filenames = parser.parse_args()
+
+ if len(input_filenames) < 1:
+ parser.print_help()
+ exit(1)
+
+ # Do some real work
+ for input_filename in input_filenames:
+ if options.verbose:
+ print input_filename
+
+ output_filename = os.path.splitext(input_filename)[0] + '.html'
+ transform(input_filename, output_filename)
+