2 # -*- coding: utf-8 -*-
4 # Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska
6 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
7 # For full license text see COPYING or <http://www.gnu.org/licenses/agpl.html>
10 from __future__ import with_statement
16 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
18 from StringIO import StringIO
19 from lxml import etree
23 (u'---', u'\u2014'), # mdash
24 (u'--', u'\u2013'), # ndash
25 (u'...', u'\u2026'), # ldots
26 (u',,', u'\u201E'), # lower double back-quote
27 (u'"', u'\u201D'), # upper double quote
30 DIALOG_EXPR = re.compile(r"\s*---\s(.*)")
32 def wl_normalize_text(context, text):
33 """XPath extension function converting all entites in passed text."""
34 if isinstance(text, list):
37 for code, ucode in REPLACEMENTS:
38 text = text.replace(code, ucode)
42 def wl_fix_dialog(context, data):
44 if isinstance(data, list):
49 m = DIALOG_EXPR.match(text)
57 def filter_verse_ends(data):
58 return data.replace('/\n', '<br />')
60 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
61 ns['normalize-text'] = wl_normalize_text
62 ns['fix-dialog-line'] = wl_fix_dialog
64 def normalize_stylesheet():
65 return etree.XSLT(etree.parse(os.path.join(os.path.dirname(librarian.__file__), 'xslt', 'normalize.xslt')))
67 if __name__ == '__main__':
68 tran = normalize_stylesheet()
70 doc = trans( etree.parse(input) )
71 print etree.tostring(doc, pretty_print=True, encoding=unicode).encode('utf-8')
73 for err in trans.error_log:
74 sys.stderr.write( (u"%s\n" % err).encode('utf-8') )