2 # -*- coding: utf-8 -*-
4 # This file is part of Librarian.
6 # Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska <fundacja@nowoczesnapolska.org.pl>
8 # For full list of contributors see AUTHORS file.
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU Affero General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU Affero General Public License for more details.
20 # You should have received a copy of the GNU Affero General Public License
21 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 from __future__ import with_statement
30 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
32 from StringIO import StringIO
33 from lxml import etree
37 (u'---', u'\u2014'), # mdash
38 (u'--', u'\u2013'), # ndash
39 (u'...', u'\u2026'), # ldots
40 (u',,', u'\u201E'), # lower double back-quote
41 (u'"', u'\u201D'), # upper double quote
44 DIALOG_EXPR = re.compile(r"\s*---\s(.*)")
46 def wl_normalize_text(context, text):
47 """XPath extension function converting all entites in passed text."""
48 if isinstance(text, list):
51 for code, ucode in REPLACEMENTS:
52 text = text.replace(code, ucode)
56 def wl_fix_dialog(context, data):
58 if isinstance(data, list):
63 m = DIALOG_EXPR.match(text)
71 def filter_verse_ends(data):
72 return data.replace('/\n', '<br />')
74 ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
75 ns['normalize-text'] = wl_normalize_text
76 ns['fix-dialog-line'] = wl_fix_dialog
78 def normalize_stylesheet():
79 return etree.XSLT(etree.parse(os.path.join(os.path.dirname(librarian.__file__), 'xslt', 'normalize.xslt')))
81 if __name__ == '__main__':
82 tran = normalize_stylesheet()
84 doc = trans( etree.parse(input) )
85 print etree.tostring(doc, pretty_print=True, encoding=unicode).encode('utf-8')
87 for err in trans.error_log:
88 sys.stderr.write( (u"%s\n" % err).encode('utf-8') )