librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import DefaultEbookCover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def set_hyph_language(source_tree):
  33     def get_short_lng_code(text):
  34         result = ''
  35         text = ''.join(text)
  36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  37             for line in f:
  38                 list = line.strip().split('|')
  39                 if list[0] == text:
  40                     result = list[2]
  41         if result == '':
  42             return text
  43         else:
  44             return result
  45     bibl_lng = etree.XPath('//dc:language//text()',
  46                            namespaces={'dc': str(DCNS)})(source_tree)
  47     short_lng = get_short_lng_code(bibl_lng[0])
  48     try:
  49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  50                                        short_lng + '.dic'))
  51     except:
  52         pass
  53
  54
  55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  57     for t in texts:
  58         parent = t.getparent()
  59         if hyph is not None:
  60             newt = ''
  61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  62             for w in wlist:
  63                 newt += hyph.inserted(w, u'\u00AD')
  64         else:
  65             newt = t
  66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  67         if t.is_text:
  68             parent.text = newt
  69         elif t.is_tail:
  70             parent.tail = newt
  71
  72
  73 def inner_xml(node):
  74     """ returns node's text and children as a string
  75
  76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  77     x<b>y</b>z
  78     """
  79
  80     nt = node.text if node.text is not None else ''
  81     return ''.join([nt] + [etree.tostring(child) for child in node])
  82
  83
  84 def set_inner_xml(node, text):
  85     """ sets node's text and children from a string
  86
  87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  88     >>> set_inner_xml(e, 'x<b>y</b>z')
  89     >>> print etree.tostring(e)
  90     <a>x<b>y</b>z</a>
  91     """
  92
  93     p = etree.fromstring('<x>%s</x>' % text)
  94     node.text = p.text
  95     node[:] = p[:]
  96
  97
  98 def node_name(node):
  99     """ Find out a node's name
 100
 101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 102     XYZ
 103     """
 104
 105     tempnode = deepcopy(node)
 106
 107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 108         for e in tempnode.findall('.//%s' % p):
 109             t = e.tail
 110             e.clear()
 111             e.tail = t
 112     etree.strip_tags(tempnode, '*')
 113     return tempnode.text
 114
 115
 116 def xslt(xml, sheet, **kwargs):
 117     if isinstance(xml, etree._Element):
 118         xml = etree.ElementTree(xml)
 119     with open(sheet) as xsltf:
 120         transform = etree.XSLT(etree.parse(xsltf))
 121         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
 122         return transform(xml, **params)
 123
 124
 125 def replace_characters(node):
 126     def replace_chars(text):
 127         if text is None:
 128             return None
 129         return text.replace(u"\ufeff", u"")\
 130                    .replace("---", u"\u2014")\
 131                    .replace("--", u"\u2013")\
 132                    .replace(",,", u"\u201E")\
 133                    .replace('"', u"\u201D")\
 134                    .replace("'", u"\u2019")
 135     if node.tag in ('uwaga', 'extra'):
 136         t = node.tail
 137         node.clear()
 138         node.tail = t
 139     node.text = replace_chars(node.text)
 140     node.tail = replace_chars(node.tail)
 141     for child in node:
 142         replace_characters(child)
 143
 144
 145 def find_annotations(annotations, source, part_no):
 146     for child in source:
 147         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 148             annotation = deepcopy(child)
 149             number = str(len(annotations) + 1)
 150             annotation.set('number', number)
 151             annotation.set('part', str(part_no))
 152             annotation.tail = ''
 153             annotations.append(annotation)
 154             tail = child.tail
 155             child.clear()
 156             child.tail = tail
 157             child.text = number
 158         if child.tag not in ('extra', 'uwaga'):
 159             find_annotations(annotations, child, part_no)
 160
 161
 162 class Stanza(object):
 163     """
 164     Converts / verse endings into verse elements in a stanza.
 165
 166     Slashes may only occur directly in the stanza. Any slashes in subelements
 167     will be ignored, and the subelements will be put inside verse elements.
 168
 169     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 170     >>> Stanza(s).versify()
 171     >>> print etree.tostring(s)
 172     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 173     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 174
 175     """
 176     def __init__(self, stanza_elem):
 177         self.stanza = stanza_elem
 178         self.verses = []
 179         self.open_verse = None
 180
 181     def versify(self):
 182         self.push_text(self.stanza.text)
 183         for elem in self.stanza:
 184             self.push_elem(elem)
 185             self.push_text(elem.tail)
 186         tail = self.stanza.tail
 187         self.stanza.clear()
 188         self.stanza.tail = tail
 189         self.stanza.extend(self.verses)
 190
 191     def open_normal_verse(self):
 192         self.open_verse = self.stanza.makeelement("wers_normalny")
 193         self.verses.append(self.open_verse)
 194
 195     def get_open_verse(self):
 196         if self.open_verse is None:
 197             self.open_normal_verse()
 198         return self.open_verse
 199
 200     def push_text(self, text):
 201         if not text:
 202             return
 203         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 204             if i:
 205                 self.open_normal_verse()
 206             verse = self.get_open_verse()
 207             if len(verse):
 208                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 209             else:
 210                 verse.text = (verse.text or "") + verse_text
 211
 212     def push_elem(self, elem):
 213         if elem.tag.startswith("wers"):
 214             verse = deepcopy(elem)
 215             verse.tail = None
 216             self.verses.append(verse)
 217             self.open_verse = verse
 218         else:
 219             appended = deepcopy(elem)
 220             appended.tail = None
 221             self.get_open_verse().append(appended)
 222
 223
 224 def replace_by_verse(tree):
 225     """ Find stanzas and create new verses in place of a '/' character """
 226
 227     stanzas = tree.findall('.//' + WLNS('strofa'))
 228     for stanza in stanzas:
 229         Stanza(stanza).versify()
 230
 231
 232 def add_to_manifest(manifest, partno):
 233     """ Adds a node to the manifest section in content.opf file """
 234
 235     partstr = 'part%d' % partno
 236     e = manifest.makeelement(
 237         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 238                                'media-type': 'application/xhtml+xml'}
 239     )
 240     manifest.append(e)
 241
 242
 243 def add_to_spine(spine, partno):
 244     """ Adds a node to the spine section in content.opf file """
 245
 246     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 247     spine.append(e)
 248
 249
 250 class TOC(object):
 251     def __init__(self, name=None, part_href=None):
 252         self.children = []
 253         self.name = name
 254         self.part_href = part_href
 255         self.sub_number = None
 256
 257     def add(self, name, part_href, level=0, is_part=True, index=None):
 258         assert level == 0 or index is None
 259         if level > 0 and self.children:
 260             return self.children[-1].add(name, part_href, level - 1, is_part)
 261         else:
 262             t = TOC(name)
 263             t.part_href = part_href
 264             if index is not None:
 265                 self.children.insert(index, t)
 266             else:
 267                 self.children.append(t)
 268             if not is_part:
 269                 t.sub_number = len(self.children) + 1
 270                 return t.sub_number
 271
 272     def append(self, toc):
 273         self.children.append(toc)
 274
 275     def extend(self, toc):
 276         self.children.extend(toc.children)
 277
 278     def depth(self):
 279         if self.children:
 280             return max((c.depth() for c in self.children)) + 1
 281         else:
 282             return 0
 283
 284     def href(self):
 285         src = self.part_href
 286         if self.sub_number is not None:
 287             src += '#sub%d' % self.sub_number
 288         return src
 289
 290     def write_to_xml(self, nav_map, counter=1):
 291         for child in self.children:
 292             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 293             nav_point.set('id', 'NavPoint-%d' % counter)
 294             nav_point.set('playOrder', str(counter))
 295
 296             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 297             text = nav_map.makeelement(NCXNS('text'))
 298             if child.name is not None:
 299                 text.text = re.sub(r'\n', ' ', child.name)
 300             else:
 301                 text.text = child.name
 302             nav_label.append(text)
 303             nav_point.append(nav_label)
 304
 305             content = nav_map.makeelement(NCXNS('content'))
 306             content.set('src', child.href())
 307             nav_point.append(content)
 308             nav_map.append(nav_point)
 309             counter = child.write_to_xml(nav_point, counter + 1)
 310         return counter
 311
 312     def html_part(self, depth=0):
 313         texts = []
 314         for child in self.children:
 315             texts.append(
 316                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 317                 (depth, child.href(), child.name))
 318             texts.append(child.html_part(depth + 1))
 319         return "\n".join(texts)
 320
 321     def html(self):
 322         with open(get_resource('epub/toc.html')) as f:
 323             t = unicode(f.read(), 'utf-8')
 324         return t % self.html_part()
 325
 326
 327 def used_chars(element):
 328     """ Lists characters used in an ETree Element """
 329     chars = set((element.text or '') + (element.tail or ''))
 330     for child in element:
 331         chars = chars.union(used_chars(child))
 332     return chars
 333
 334
 335 def chop(main_text):
 336     """ divide main content of the XML file into chunks """
 337
 338     # prepare a container for each chunk
 339     part_xml = etree.Element('utwor')
 340     etree.SubElement(part_xml, 'master')
 341     main_xml_part = part_xml[0]  # master
 342
 343     last_node_part = False
 344
 345     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 346     is_scene = False
 347     is_act = False
 348     for one_part in main_text:
 349         name = one_part.tag
 350         if name == 'naglowek_scena':
 351             is_scene = True
 352         elif name == 'naglowek_akt':
 353             is_act = True
 354
 355     for one_part in main_text:
 356         name = one_part.tag
 357         if is_act is False and is_scene is True:
 358             if name == 'naglowek_czesc':
 359                 yield part_xml
 360                 last_node_part = True
 361                 main_xml_part[:] = [deepcopy(one_part)]
 362             elif not last_node_part and name == "naglowek_scena":
 363                 yield part_xml
 364                 main_xml_part[:] = [deepcopy(one_part)]
 365             else:
 366                 main_xml_part.append(deepcopy(one_part))
 367                 last_node_part = False
 368         else:
 369             if name == 'naglowek_czesc':
 370                 yield part_xml
 371                 last_node_part = True
 372                 main_xml_part[:] = [deepcopy(one_part)]
 373             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 374                 yield part_xml
 375                 main_xml_part[:] = [deepcopy(one_part)]
 376             else:
 377                 main_xml_part.append(deepcopy(one_part))
 378                 last_node_part = False
 379     yield part_xml
 380
 381
 382 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 383     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 384
 385     toc = TOC()
 386     for element in chunk_xml[0]:
 387         if element.tag == "naglowek_czesc":
 388             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 389         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 390             toc.add(node_name(element), "part%d.html" % chunk_no)
 391         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 392             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 393             element.set('sub', str(subnumber))
 394     if empty:
 395         if not _empty_html_static:
 396             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 397         chars = set()
 398         output_html = _empty_html_static[0]
 399     else:
 400         find_annotations(annotations, chunk_xml, chunk_no)
 401         replace_by_verse(chunk_xml)
 402         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 403         chars = used_chars(html_tree.getroot())
 404         output_html = etree.tostring(
 405             html_tree, pretty_print=True, xml_declaration=True,
 406             encoding="utf-8",
 407             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 408                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 409         )
 410     return output_html, toc, chars
 411
 412
 413 def transform(wldoc, verbose=False, style=None, html_toc=False,
 414               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
 415     """ produces a EPUB file
 416
 417     sample=n: generate sample e-book (with at least n paragraphs)
 418     cover: a cover.Cover factory or True for default
 419     flags: less-advertising, without-fonts, working-copy
 420     """
 421
 422     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 423         """ processes one input file and proceeds to its children """
 424
 425         replace_characters(wldoc.edoc.getroot())
 426
 427         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 428         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 429
 430         # every input file will have a TOC entry,
 431         # pointing to starting chunk
 432         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 433         chars = set()
 434         if first:
 435             # write book title page
 436             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
 437             chars = used_chars(html_tree.getroot())
 438             zip.writestr(
 439                 'OPS/title.html',
 440                 etree.tostring(
 441                     html_tree, pretty_print=True, xml_declaration=True,
 442                     encoding="utf-8",
 443                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 444                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 445                 )
 446             )
 447             # add a title page TOC entry
 448             toc.add(u"Strona tytułowa", "title.html")
 449         elif wldoc.book_info.parts:
 450             # write title page for every parent
 451             if sample is not None and sample <= 0:
 452                 chars = set()
 453                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 454             else:
 455                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 456                 chars = used_chars(html_tree.getroot())
 457                 html_string = etree.tostring(
 458                     html_tree, pretty_print=True, xml_declaration=True,
 459                     encoding="utf-8",
 460                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 461                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 462                 )
 463             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 464             add_to_manifest(manifest, chunk_counter)
 465             add_to_spine(spine, chunk_counter)
 466             chunk_counter += 1
 467
 468         if len(wldoc.edoc.getroot()) > 1:
 469             # rdf before style master
 470             main_text = wldoc.edoc.getroot()[1]
 471         else:
 472             # rdf in style master
 473             main_text = wldoc.edoc.getroot()[0]
 474             if main_text.tag == RDFNS('RDF'):
 475                 main_text = None
 476
 477         if main_text is not None:
 478             for chunk_xml in chop(main_text):
 479                 empty = False
 480                 if sample is not None:
 481                     if sample <= 0:
 482                         empty = True
 483                     else:
 484                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 485                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 486
 487                 toc.extend(chunk_toc)
 488                 chars = chars.union(chunk_chars)
 489                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 490                 add_to_manifest(manifest, chunk_counter)
 491                 add_to_spine(spine, chunk_counter)
 492                 chunk_counter += 1
 493
 494         for child in wldoc.parts():
 495             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 496                 child, chunk_counter, first=False, sample=sample)
 497             toc.append(child_toc)
 498             chars = chars.union(chunk_chars)
 499
 500         return toc, chunk_counter, chars, sample
 501
 502     document = deepcopy(wldoc)
 503     del wldoc
 504
 505     if flags:
 506         for flag in flags:
 507             document.edoc.getroot().set(flag, 'yes')
 508
 509     document.clean_ed_note()
 510     document.clean_ed_note('abstrakt')
 511
 512     # add editors info
 513     editors = document.editors()
 514     if editors:
 515         document.edoc.getroot().set('editors', u', '.join(sorted(
 516             editor.readable() for editor in editors)))
 517     if document.book_info.funders:
 518         document.edoc.getroot().set('funders', u', '.join(
 519             document.book_info.funders))
 520     if document.book_info.thanks:
 521         document.edoc.getroot().set('thanks', document.book_info.thanks)
 522
 523     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 524     manifest = opf.find('.//' + OPFNS('manifest'))
 525     guide = opf.find('.//' + OPFNS('guide'))
 526     spine = opf.find('.//' + OPFNS('spine'))
 527
 528     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 529     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 530
 531     functions.reg_mathml_epub(zip)
 532
 533     if os.path.isdir(ilustr_path):
 534         for i, filename in enumerate(os.listdir(ilustr_path)):
 535             file_path = os.path.join(ilustr_path, filename)
 536             zip.write(file_path, os.path.join('OPS', filename))
 537             image_id = 'image%s' % i
 538             manifest.append(etree.fromstring(
 539                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 540
 541     # write static elements
 542     mime = zipfile.ZipInfo()
 543     mime.filename = 'mimetype'
 544     mime.compress_type = zipfile.ZIP_STORED
 545     mime.extra = ''
 546     zip.writestr(mime, 'application/epub+zip')
 547     zip.writestr(
 548         'META-INF/container.xml',
 549         '<?xml version="1.0" ?>'
 550         '<container version="1.0" '
 551         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 552         '<rootfiles><rootfile full-path="OPS/content.opf" '
 553         'media-type="application/oebps-package+xml" />'
 554         '</rootfiles></container>'
 555     )
 556     zip.write(get_resource('res/wl-logo-small.png'),
 557               os.path.join('OPS', 'logo_wolnelektury.png'))
 558     zip.write(get_resource('res/jedenprocent.png'),
 559               os.path.join('OPS', 'jedenprocent.png'))
 560     if not style:
 561         style = get_resource('epub/style.css')
 562     zip.write(style, os.path.join('OPS', 'style.css'))
 563
 564     if cover:
 565         if cover is True:
 566             cover = DefaultEbookCover
 567
 568         cover_file = StringIO()
 569         bound_cover = cover(document.book_info)
 570         bound_cover.save(cover_file)
 571         cover_name = 'cover.%s' % bound_cover.ext()
 572         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 573         del cover_file
 574
 575         cover_tree = etree.parse(get_resource('epub/cover.html'))
 576         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 577         zip.writestr('OPS/cover.html', etree.tostring(
 578             cover_tree, pretty_print=True, xml_declaration=True,
 579             encoding="utf-8",
 580             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 581                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 582         ))
 583
 584         if bound_cover.uses_dc_cover:
 585             if document.book_info.cover_by:
 586                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 587             if document.book_info.cover_source:
 588                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 589
 590         manifest.append(etree.fromstring(
 591             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 592         manifest.append(etree.fromstring(
 593             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 594         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 595         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 596         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 597
 598     annotations = etree.Element('annotations')
 599
 600     toc_file = etree.fromstring(
 601         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 602         '"-//NISO//DTD ncx 2005-1//EN" '
 603         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 604         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 605         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 606         '</navMap></ncx>'
 607     )
 608     nav_map = toc_file[-1]
 609
 610     if html_toc:
 611         manifest.append(etree.fromstring(
 612             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 613         spine.append(etree.fromstring(
 614             '<itemref idref="html_toc" />'))
 615         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 616
 617     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 618
 619     if len(toc.children) < 2:
 620         toc.add(u"Początek utworu", "part1.html")
 621
 622     # Last modifications in container files and EPUB creation
 623     if len(annotations) > 0:
 624         toc.add("Przypisy", "annotations.html")
 625         manifest.append(etree.fromstring(
 626             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 627         spine.append(etree.fromstring(
 628             '<itemref idref="annotations" />'))
 629         replace_by_verse(annotations)
 630         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 631         chars = chars.union(used_chars(html_tree.getroot()))
 632         zip.writestr('OPS/annotations.html', etree.tostring(
 633             html_tree, pretty_print=True, xml_declaration=True,
 634             encoding="utf-8",
 635             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 636                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 637         ))
 638
 639     toc.add("Wesprzyj Wolne Lektury", "support.html")
 640     manifest.append(etree.fromstring(
 641         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 642     spine.append(etree.fromstring(
 643         '<itemref idref="support" />'))
 644     html_string = open(get_resource('epub/support.html')).read()
 645     chars.update(used_chars(etree.fromstring(html_string)))
 646     zip.writestr('OPS/support.html', html_string)
 647
 648     toc.add("Strona redakcyjna", "last.html")
 649     manifest.append(etree.fromstring(
 650         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 651     spine.append(etree.fromstring(
 652         '<itemref idref="last" />'))
 653     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
 654     chars.update(used_chars(html_tree.getroot()))
 655     zip.writestr('OPS/last.html', etree.tostring(
 656         html_tree, pretty_print=True, xml_declaration=True,
 657         encoding="utf-8",
 658         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 659                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 660     ))
 661
 662     if not flags or 'without-fonts' not in flags:
 663         # strip fonts
 664         tmpdir = mkdtemp('-librarian-epub')
 665         try:
 666             cwd = os.getcwd()
 667         except OSError:
 668             cwd = None
 669
 670         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 671         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 672             optimizer_call = ['perl', 'subset.pl', '--chars',
 673                               ''.join(chars).encode('utf-8'),
 674                               get_resource('fonts/' + fname),
 675                               os.path.join(tmpdir, fname)]
 676             if verbose:
 677                 print "Running font-optimizer"
 678                 subprocess.check_call(optimizer_call)
 679             else:
 680                 dev_null = open(os.devnull, 'w')
 681                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
 682             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 683             manifest.append(etree.fromstring(
 684                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 685         rmtree(tmpdir)
 686         if cwd is not None:
 687             os.chdir(cwd)
 688     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 689                  xml_declaration=True, encoding="utf-8"))
 690     title = document.book_info.title
 691     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 692     for st in attributes:
 693         meta = toc_file.makeelement(NCXNS('meta'))
 694         meta.set('name', st)
 695         meta.set('content', '0')
 696         toc_file[0].append(meta)
 697     toc_file[0][0].set('content', str(document.book_info.url))
 698     toc_file[0][1].set('content', str(toc.depth()))
 699     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 700
 701     # write TOC
 702     if html_toc:
 703         toc.add(u"Spis treści", "toc.html", index=1)
 704         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 705     toc.write_to_xml(nav_map)
 706     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 707                  xml_declaration=True, encoding="utf-8"))
 708     zip.close()
 709
 710     return OutputFile.from_filename(output_file.name)