X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/e2bc69b731434e6929686637ba4cf296632138e2..84ec0eba63d0933b3f22a7884c46be6b796ee165:/librarian/epub.py diff --git a/librarian/epub.py b/librarian/epub.py index a615b7e..bb3123d 100644 --- a/librarian/epub.py +++ b/librarian/epub.py @@ -84,6 +84,10 @@ def replace_characters(node): .replace(",,", u"\u201E")\ .replace('"', u"\u201D")\ .replace("'", u"\u2019") + if node.tag in ('uwaga', 'extra'): + t = node.tail + node.clear() + node.tail = t node.text = replace_chars(node.text) node.tail = replace_chars(node.tail) for child in node: @@ -154,19 +158,23 @@ def add_to_spine(spine, partno): class TOC(object): - def __init__(self, name=None, part_number=None): + def __init__(self, name=None, part_href=None): self.children = [] self.name = name - self.part_number = part_number + self.part_href = part_href self.sub_number = None - def add(self, name, part_number, level=0, is_part=True): + def add(self, name, part_href, level=0, is_part=True, index=None): + assert level == 0 or index is None if level > 0 and self.children: - return self.children[-1].add(name, part_number, level-1, is_part) + return self.children[-1].add(name, part_href, level-1, is_part) else: t = TOC(name) - t.part_number = part_number - self.children.append(t) + t.part_href = part_href + if index is not None: + self.children.insert(index, t) + else: + self.children.append(t) if not is_part: t.sub_number = len(self.children) + 1 return t.sub_number @@ -183,7 +191,13 @@ class TOC(object): else: return 0 - def write_to_xml(self, nav_map, counter): + def href(self): + src = self.part_href + if self.sub_number is not None: + src += '#sub%d' % self.sub_number + return src + + def write_to_xml(self, nav_map, counter=1): for child in self.children: nav_point = nav_map.makeelement(NCXNS('navPoint')) nav_point.set('id', 'NavPoint-%d' % counter) @@ -196,15 +210,26 @@ class TOC(object): nav_point.append(nav_label) content = nav_map.makeelement(NCXNS('content')) - src = 'part%d.html' % child.part_number - if child.sub_number is not None: - src += '#sub%d' % child.sub_number - content.set('src', src) + content.set('src', child.href()) nav_point.append(content) nav_map.append(nav_point) counter = child.write_to_xml(nav_point, counter + 1) return counter + def html_part(self, depth=0): + texts = [] + for child in self.children: + texts.append( + "
%s
" % + (depth, child.href(), child.name)) + texts.append(child.html_part(depth+1)) + return "\n".join(texts) + + def html(self): + with open(get_resource('epub/toc.html')) as f: + t = unicode(f.read(), 'utf-8') + return t % self.html_part() + def used_chars(element): """ Lists characters used in an ETree Element """ @@ -244,9 +269,9 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s toc = TOC() for element in chunk_xml[0]: if element.tag in ("naglowek_czesc", "naglowek_rozdzial", "naglowek_akt", "srodtytul"): - toc.add(node_name(element), chunk_no) + toc.add(node_name(element), "part%d.html" % chunk_no) elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'): - subnumber = toc.add(node_name(element), chunk_no, level=1, is_part=False) + subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False) element.set('sub', str(subnumber)) if empty: if not _empty_html_static: @@ -263,6 +288,7 @@ def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_s def transform(provider, slug=None, file_path=None, output_file=None, output_dir=None, make_dir=False, verbose=False, + style=None, html_toc=False, sample=None, cover=None, flags=None): """ produces a EPUB file @@ -273,7 +299,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= make_dir: writes output to //.epub instead of /.epub sample=n: generate sample e-book (with at least n paragraphs) cover: a cover.Cover object - flags: less-advertising, + flags: less-advertising, without-fonts """ def transform_file(input_xml, chunk_counter=1, first=True, sample=None): @@ -285,7 +311,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= # every input file will have a TOC entry, # pointing to starting chunk - toc = TOC(node_name(input_xml.find('.//'+DCNS('title'))), chunk_counter) + toc = TOC(node_name(input_xml.find('.//'+DCNS('title'))), "part%d.html" % chunk_counter) chars = set() if first: # write book title page @@ -293,6 +319,8 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= chars = used_chars(html_tree.getroot()) zip.writestr('OPS/title.html', etree.tostring(html_tree, method="html", pretty_print=True)) + # add a title page TOC entry + toc.add(u"Strona tytułowa", "title.html") elif children: # write title page for every parent if sample is not None and sample <= 0: @@ -391,11 +419,15 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '' \ '') - zip.write(get_resource('epub/style.css'), os.path.join('OPS', 'style.css')) zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png')) + zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png')) + if not style: + style = get_resource('epub/style.css') + zip.write(style, os.path.join('OPS', 'style.css')) opf = xslt(metadata, get_resource('epub/xsltContent.xsl')) manifest = opf.find('.//' + OPFNS('manifest')) + guide = opf.find('.//' + OPFNS('guide')) spine = opf.find('.//' + OPFNS('spine')) if cover: @@ -415,9 +447,9 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '')) manifest.append(etree.fromstring( '' % (c_name, c.mime_type()))) - spine.insert(0, etree.fromstring('')) + spine.insert(0, etree.fromstring('')) opf.getroot()[0].append(etree.fromstring('')) - opf.getroot().append(etree.fromstring('')) + guide.append(etree.fromstring('')) annotations = etree.Element('annotations') @@ -426,23 +458,24 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \ '' \ - '' \ - 'Strona tytułowa' \ - '') + '') nav_map = toc_file[-1] + if html_toc: + manifest.append(etree.fromstring( + '')) + spine.append(etree.fromstring( + '')) + guide.append(etree.fromstring('')) + toc, chunk_counter, chars, sample = transform_file(input_xml, sample=sample) - if not toc.children: - toc.add(u"Początek utworu", 1) - toc_counter = toc.write_to_xml(nav_map, 2) + if len(toc.children) < 2: + toc.add(u"Początek utworu", "part1.html") # Last modifications in container files and EPUB creation if len(annotations) > 0: - nav_map.append(etree.fromstring( - 'Przypisy'\ - '' % {'i': toc_counter})) - toc_counter += 1 + toc.add("Przypisy", "annotations.html") manifest.append(etree.fromstring( '')) spine.append(etree.fromstring( @@ -453,9 +486,7 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= zip.writestr('OPS/annotations.html', etree.tostring( html_tree, method="html", pretty_print=True)) - nav_map.append(etree.fromstring( - 'Strona redakcyjna'\ - '' % {'i': toc_counter})) + toc.add("Strona redakcyjna", "last.html") manifest.append(etree.fromstring( '')) spine.append(etree.fromstring( @@ -465,22 +496,25 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= zip.writestr('OPS/last.html', etree.tostring( html_tree, method="html", pretty_print=True)) - # strip fonts - tmpdir = mkdtemp('-librarian-epub') - cwd = os.getcwd() - - os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) - for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': - optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), - get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] - if verbose: - print "Running font-optimizer" - subprocess.check_call(optimizer_call) - else: - subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) - rmtree(tmpdir) - os.chdir(cwd) + if not flags or not 'without-fonts' in flags: + # strip fonts + tmpdir = mkdtemp('-librarian-epub') + cwd = os.getcwd() + + os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer')) + for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf': + optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'), + get_resource('fonts/' + fname), os.path.join(tmpdir, fname)] + if verbose: + print "Running font-optimizer" + subprocess.check_call(optimizer_call) + else: + subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname)) + manifest.append(etree.fromstring( + '' % (fname, fname))) + rmtree(tmpdir) + os.chdir(cwd) zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True)) contents = [] @@ -494,5 +528,11 @@ def transform(provider, slug=None, file_path=None, output_file=None, output_dir= toc_file[0][0].set('content', ''.join((title, 'WolneLektury.pl'))) toc_file[0][1].set('content', str(toc.depth())) set_inner_xml(toc_file[1], ''.join(('', title, ''))) + + # write TOC + if html_toc: + toc.add(u"Spis treści", "toc.html", index=1) + zip.writestr('OPS/toc.html', toc.html().encode('utf-8')) + toc.write_to_xml(nav_map) zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True)) zip.close()