X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/17a9ed3b7ef12e0786ddf46bf8a52b1087224762..482477cced586463d7f342cdde8482a4d97d7685:/librarian/epub.py diff --git a/librarian/epub.py b/librarian/epub.py index bbeb3d7..01f5c92 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -7,6 +7,7 @@ from __future__ import with_statement import os import os.path +import re import subprocess from StringIO import StringIO from copy import deepcopy @@ -15,8 +16,7 @@ import zipfile from tempfile import mkdtemp, NamedTemporaryFile from shutil import rmtree -from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, OutputFile -from librarian.cover import WLCover +from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, IOFile from librarian import functions, get_resource @@ -33,6 +33,7 @@ def inner_xml(node): nt = node.text if node.text is not None else '' return ''.join([nt] + [etree.tostring(child) for child in node]) + def set_inner_xml(node, text): """ sets node's text and children from a string @@ -109,31 +110,74 @@ def find_annotations(annotations, source, part_no): find_annotations(annotations, child, part_no) +class Stanza(object): + """ + Converts / verse endings into verse elements in a stanza. + + Slashes may only occur directly in the stanza. Any slashes in subelements + will be ignored, and the subelements will be put inside verse elements. + + >>> s = etree.fromstring("a/\\nbx/\\nyc/ \\nd") + >>> Stanza(s).versify() + >>> print etree.tostring(s) + abx/ + ycd + + """ + def __init__(self, stanza_elem): + self.stanza = stanza_elem + self.verses = [] + self.open_verse = None + + def versify(self): + self.push_text(self.stanza.text) + for elem in self.stanza: + self.push_elem(elem) + self.push_text(elem.tail) + tail = self.stanza.tail + self.stanza.clear() + self.stanza.tail = tail + self.stanza.extend(self.verses) + + def open_normal_verse(self): + self.open_verse = self.stanza.makeelement("wers_normalny") + self.verses.append(self.open_verse) + + def get_open_verse(self): + if self.open_verse is None: + self.open_normal_verse() + return self.open_verse + + def push_text(self, text): + if not text or not text.strip(): + return + for i, verse_text in enumerate(re.split(r"/\s*\n", text)): + if i: + self.open_normal_verse() + verse = self.get_open_verse() + if len(verse): + verse[-1].tail = (verse[-1].tail or "") + verse_text.strip() + else: + verse.text = (verse.text or "") + verse_text.strip() + + def push_elem(self, elem): + if elem.tag.startswith("wers"): + verse = deepcopy(elem) + verse.tail = None + self.verses.append(verse) + self.open_verse = verse + else: + appended = deepcopy(elem) + appended.tail = None + self.get_open_verse().append(appended) + + def replace_by_verse(tree): """ Find stanzas and create new verses in place of a '/' character """ stanzas = tree.findall('.//' + WLNS('strofa')) - for node in stanzas: - for child_node in node: - if child_node.tag in ('slowo_obce', 'wyroznienie'): - foreign_verses = inner_xml(child_node).split('/\n') - if len(foreign_verses) > 1: - new_foreign = '' - for foreign_verse in foreign_verses: - if foreign_verse.startswith('', foreign_verse, '')) - set_inner_xml(child_node, new_foreign) - verses = inner_xml(node).split('/\n') - if len(verses) > 1: - modified_inner_xml = '' - for verse in verses: - if verse.startswith('', verse, '')) - set_inner_xml(node, modified_inner_xml) + for stanza in stanzas: + Stanza(stanza).versify() def add_to_manifest(manifest, partno): @@ -151,7 +195,7 @@ def add_to_manifest(manifest, partno): def add_to_spine(spine, partno): """ Adds a node to the spine section in content.opf file """ - e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno}); + e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno}) spine.append(e) @@ -243,7 +287,7 @@ def chop(main_text): # prepare a container for each chunk part_xml = etree.Element('utwor') etree.SubElement(part_xml, 'master') - main_xml_part = part_xml[0] # master + main_xml_part = part_xml[0] # master last_node_part = False for one_part in main_text: @@ -261,8 +305,10 @@ def chop(main_text): yield part_xml -def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]): +def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=None): """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """ + if _empty_html_static is None: + _empty_html_static = [] toc = TOC() for element in chunk_xml[0]: @@ -308,8 +354,7 @@ def transform(wldoc, verbose=False, # write book title page html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl')) chars = used_chars(html_tree.getroot()) - zip.writestr('OPS/title.html', - etree.tostring(html_tree, method="html", pretty_print=True)) + zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True)) # add a title page TOC entry toc.add(u"Strona tytułowa", "title.html") elif wldoc.book_info.parts: @@ -352,15 +397,14 @@ def transform(wldoc, verbose=False, add_to_spine(spine, chunk_counter) chunk_counter += 1 - for child in wldoc.parts(): - child_toc, chunk_counter, chunk_chars, sample = transform_file( - child, chunk_counter, first=False, sample=sample) - toc.append(child_toc) - chars = chars.union(chunk_chars) + # for child in wldoc.parts(): + # child_toc, chunk_counter, chunk_chars, sample = transform_file( + # child, chunk_counter, first=False, sample=sample) + # toc.append(child_toc) + # chars = chars.union(chunk_chars) return toc, chunk_counter, chars, sample - document = deepcopy(wldoc) del wldoc @@ -369,8 +413,8 @@ def transform(wldoc, verbose=False, document.edoc.getroot().set(flag, 'yes') # add editors info - document.edoc.getroot().set('editors', u', '.join(sorted( - editor.readable() for editor in document.editors()))) + # document.edoc.getroot().set('editors', u', '.join(sorted( + # editor.readable() for editor in document.editors()))) opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) @@ -386,11 +430,12 @@ def transform(wldoc, verbose=False, mime.compress_type = zipfile.ZIP_STORED mime.extra = '' zip.writestr(mime, 'application/epub+zip') - zip.writestr('META-INF/container.xml', '' \ - '' \ - '') + zip.writestr( + 'META-INF/container.xml', '' + '' + '') zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) if not style: @@ -398,9 +443,6 @@ def transform(wldoc, verbose=False, zip.write(style, os.path.join('OPS', 'style.css')) if cover: - if cover is True: - cover = WLCover - cover_file = StringIO() bound_cover = cover(document.book_info) bound_cover.save(cover_file) @@ -427,14 +469,14 @@ def transform(wldoc, verbose=False, opf.getroot()[0].append(etree.fromstring('')) guide.append(etree.fromstring('')) - annotations = etree.Element('annotations') - toc_file = etree.fromstring('' \ - '' \ - '') + toc_file = etree.fromstring( + '' + '' + '') nav_map = toc_file[-1] if html_toc: @@ -472,7 +514,7 @@ def transform(wldoc, verbose=False, zip.writestr('OPS/last.html', etree.tostring( html_tree, method="html", pretty_print=True)) - if not flags or not 'without-fonts' in flags: + if not flags or 'without-fonts' not in flags: # strip fonts tmpdir = mkdtemp('-librarian-epub') try: @@ -516,4 +558,4 @@ def transform(wldoc, verbose=False, zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) zip.close() - return OutputFile.from_filename(output_file.name) + return IOFile.from_filename(output_file.name)