X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/2258dea3cedb63cc7b0f0bcacec6d863381d42ca..549a9425ac814c97755abf50d449d5ddf2168d17:/librarian/fb2.py diff --git a/librarian/fb2.py b/librarian/fb2.py index b0ad410..6dd1c35 100644 --- a/librarian/fb2.py +++ b/librarian/fb2.py @@ -3,14 +3,41 @@ # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from __future__ import unicode_literals + import os.path from copy import deepcopy from lxml import etree +import six from librarian import functions, OutputFile +from .epub import replace_by_verse functions.reg_substitute_entities() +functions.reg_person_name() + + +def sectionify(tree): + """Finds section headers and adds a tree of _section tags.""" + sections = [ + 'naglowek_czesc', + 'naglowek_akt', 'naglowek_rozdzial', 'naglowek_scena', + 'naglowek_podrozdzial'] + section_level = dict((v, k) for (k, v) in enumerate(sections)) + + # We can assume there are just subelements an no text at section level. + for level, section_name in reversed(list(enumerate(sections))): + for header in tree.findall('//' + section_name): + section = header.makeelement("_section") + header.addprevious(section) + section.append(header) + sibling = section.getnext() + while (sibling is not None and + section_level.get(sibling.tag, 1000) > level): + section.append(sibling) + sibling = section.getnext() + def transform(wldoc, verbose=False, cover=None, flags=None): @@ -27,9 +54,17 @@ def transform(wldoc, verbose=False, for flag in flags: document.edoc.getroot().set(flag, 'yes') + document.clean_ed_note() + document.clean_ed_note('abstrakt') + style_filename = os.path.join(os.path.dirname(__file__), 'fb2/fb2.xslt') style = etree.parse(style_filename) + replace_by_verse(document.edoc) + sectionify(document.edoc) + result = document.transform(style) - return OutputFile.from_string(unicode(result).encode('utf-8')) + return OutputFile.from_bytes(six.text_type(result).encode('utf-8')) + +# vim:et