X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/6642c1c71c5c6ce6ef3401c8c9da84cf076b018b..861348cca153705d844f9e50358a2076ef8be295:/librarian/html.py diff --git a/librarian/html.py b/librarian/html.py index 4edbf33..7733fee 100644 --- a/librarian/html.py +++ b/librarian/html.py @@ -1,4 +1,24 @@ # -*- coding: utf-8 -*- +# +# This file is part of Librarian. +# +# Copyright © 2008,2009,2010 Fundacja Nowoczesna Polska +# +# For full list of contributors see AUTHORS file. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# import os import cStringIO import re @@ -6,7 +26,9 @@ import copy from lxml import etree from librarian.parser import WLDocument +from librarian import XHTMLNS, ParseError +from lxml.etree import XMLSyntaxError, XSLTApplyError ENTITY_SUBSTITUTIONS = [ (u'---', u'—'), @@ -16,6 +38,14 @@ ENTITY_SUBSTITUTIONS = [ (u'"', u'”'), ] +STYLESHEETS = { + 'legacy': 'xslt/book2html.xslt', + 'full': 'xslt/wl2html_full.xslt', + 'partial': 'xslt/wl2html_partial.xslt' +} + +def get_stylesheet(name): + return os.path.join(os.path.dirname(__file__), STYLESHEETS[name]) def substitute_entities(context, text): """XPath extension function converting all entites in passed text.""" @@ -25,38 +55,43 @@ def substitute_entities(context, text): text = text.replace(entity, substitutution) return text - # Register substitute_entities function with lxml ns = etree.FunctionNamespace('http://wolnelektury.pl/functions') ns['substitute_entities'] = substitute_entities - -def transform(input, output_filename=None, is_file=True): +def transform(input, output_filename=None, is_file=True, \ + parse_dublincore=True, stylesheet='legacy', options={}): """Transforms file input_filename in XML to output_filename in XHTML.""" # Parse XSLT - style_filename = os.path.join(os.path.dirname(__file__), 'book2html.xslt') - style = etree.parse(style_filename) + try: + style_filename = get_stylesheet(stylesheet) + style = etree.parse(style_filename) - if is_file: - document = WLDocument.from_file(input, True) - else: - document = WLDocument.from_string(input, True) - - result = document.transform(style) - del document # no longer needed large object :) + if is_file: + document = WLDocument.from_file(input, True, \ + parse_dublincore=parse_dublincore) + else: + document = WLDocument.from_string(input, True, \ + parse_dublincore=parse_dublincore) - if result.find('//p') is not None: - add_anchors(result.getroot()) - add_table_of_contents(result.getroot()) + result = document.transform(style, **options) + del document # no longer needed large object :) - if output_filename is not None: - result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + if etree.ETXPath('//p|//{%s}p' % str(XHTMLNS))(result) is not None: + add_anchors(result.getroot()) + add_table_of_contents(result.getroot()) + + if output_filename is not None: + result.write(output_filename, xml_declaration=False, pretty_print=True, encoding='utf-8') + else: + return result + return True else: - return result - return True - else: - return False - + return "" + except KeyError: + raise ValueError("'%s' is not a valid stylesheet.") + except (XMLSyntaxError, XSLTApplyError), e: + raise ParseError(e) class Fragment(object): def __init__(self, id, themes):