librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import print_function, unicode_literals
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from six import BytesIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import make_cover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def squeeze_whitespace(s):
  33     return re.sub(b'\\s+', b' ', s)
  34
  35
  36 def set_hyph_language(source_tree):
  37     def get_short_lng_code(text):
  38         result = ''
  39         text = ''.join(text)
  40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  41             for line in f.read().decode('latin1').split('\n'):
  42                 list = line.strip().split('|')
  43                 if list[0] == text:
  44                     result = list[2]
  45         if result == '':
  46             return text
  47         else:
  48             return result
  49     bibl_lng = etree.XPath('//dc:language//text()',
  50                            namespaces={'dc': str(DCNS)})(source_tree)
  51     short_lng = get_short_lng_code(bibl_lng[0])
  52     try:
  53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  54                                        short_lng + '.dic'))
  55     except:
  56         pass
  57
  58
  59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  61     for t in texts:
  62         parent = t.getparent()
  63         if hyph is not None:
  64             newt = ''
  65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  66             for w in wlist:
  67                 newt += hyph.inserted(w, u'\u00AD')
  68         else:
  69             newt = t
  70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  71         if t.is_text:
  72             parent.text = newt
  73         elif t.is_tail:
  74             parent.tail = newt
  75
  76
  77 def inner_xml(node):
  78     """ returns node's text and children as a string
  79
  80     >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
  81     x<b>y</b>z
  82     """
  83
  84     nt = node.text if node.text is not None else ''
  85     return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
  86
  87
  88 def set_inner_xml(node, text):
  89     """ sets node's text and children from a string
  90
  91     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  92     >>> set_inner_xml(e, 'x<b>y</b>z')
  93     >>> print(etree.tostring(e, encoding='unicode'))
  94     <a>x<b>y</b>z</a>
  95     """
  96
  97     p = etree.fromstring('<x>%s</x>' % text)
  98     node.text = p.text
  99     node[:] = p[:]
 100
 101
 102 def node_name(node):
 103     """ Find out a node's name
 104
 105     >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
 106     XYZ
 107     """
 108
 109     tempnode = deepcopy(node)
 110
 111     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 112         for e in tempnode.findall('.//%s' % p):
 113             t = e.tail
 114             e.clear()
 115             e.tail = t
 116     etree.strip_tags(tempnode, '*')
 117     return tempnode.text
 118
 119
 120 def xslt(xml, sheet, **kwargs):
 121     if isinstance(xml, etree._Element):
 122         xml = etree.ElementTree(xml)
 123     with open(sheet) as xsltf:
 124         transform = etree.XSLT(etree.parse(xsltf))
 125         params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
 126         return transform(xml, **params)
 127
 128
 129 def replace_characters(node):
 130     def replace_chars(text):
 131         if text is None:
 132             return None
 133         return text.replace(u"\ufeff", u"")\
 134                    .replace("---", u"\u2014")\
 135                    .replace("--", u"\u2013")\
 136                    .replace(",,", u"\u201E")\
 137                    .replace('"', u"\u201D")\
 138                    .replace("'", u"\u2019")
 139     if node.tag in ('uwaga', 'extra'):
 140         t = node.tail
 141         node.clear()
 142         node.tail = t
 143     node.text = replace_chars(node.text)
 144     node.tail = replace_chars(node.tail)
 145     for child in node:
 146         replace_characters(child)
 147
 148
 149 def find_annotations(annotations, source, part_no):
 150     for child in source:
 151         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 152             annotation = deepcopy(child)
 153             number = str(len(annotations) + 1)
 154             annotation.set('number', number)
 155             annotation.set('part', str(part_no))
 156             annotation.tail = ''
 157             annotations.append(annotation)
 158             tail = child.tail
 159             child.clear()
 160             child.tail = tail
 161             child.text = number
 162         if child.tag not in ('extra', 'uwaga'):
 163             find_annotations(annotations, child, part_no)
 164
 165
 166 class Stanza(object):
 167     """
 168     Converts / verse endings into verse elements in a stanza.
 169
 170     Slashes may only occur directly in the stanza. Any slashes in subelements
 171     will be ignored, and the subelements will be put inside verse elements.
 172
 173     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 174     >>> Stanza(s).versify()
 175     >>> print(etree.tostring(s, encoding='unicode'))
 176     <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
 177     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 178
 179     """
 180     def __init__(self, stanza_elem):
 181         self.stanza = stanza_elem
 182         self.verses = []
 183         self.open_verse = None
 184
 185     def versify(self):
 186         self.push_text(self.stanza.text)
 187         for elem in self.stanza:
 188             self.push_elem(elem)
 189             self.push_text(elem.tail)
 190         tail = self.stanza.tail
 191         self.stanza.clear()
 192         self.stanza.tail = tail
 193         self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0)
 194
 195     def open_normal_verse(self):
 196         self.open_verse = self.stanza.makeelement("wers_normalny")
 197         self.verses.append(self.open_verse)
 198
 199     def get_open_verse(self):
 200         if self.open_verse is None:
 201             self.open_normal_verse()
 202         return self.open_verse
 203
 204     def push_text(self, text):
 205         if not text:
 206             return
 207         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 208             if i:
 209                 self.open_normal_verse()
 210             if not verse_text.strip():
 211                 continue
 212             verse = self.get_open_verse()
 213             if len(verse):
 214                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 215             else:
 216                 verse.text = (verse.text or "") + verse_text
 217
 218     def push_elem(self, elem):
 219         if elem.tag.startswith("wers"):
 220             verse = deepcopy(elem)
 221             verse.tail = None
 222             self.verses.append(verse)
 223             self.open_verse = verse
 224         else:
 225             appended = deepcopy(elem)
 226             appended.tail = None
 227             self.get_open_verse().append(appended)
 228
 229
 230 def replace_by_verse(tree):
 231     """ Find stanzas and create new verses in place of a '/' character """
 232
 233     stanzas = tree.findall('.//' + WLNS('strofa'))
 234     for stanza in stanzas:
 235         Stanza(stanza).versify()
 236
 237
 238 def add_to_manifest(manifest, partno):
 239     """ Adds a node to the manifest section in content.opf file """
 240
 241     partstr = 'part%d' % partno
 242     e = manifest.makeelement(
 243         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 244                                'media-type': 'application/xhtml+xml'}
 245     )
 246     manifest.append(e)
 247
 248
 249 def add_to_spine(spine, partno):
 250     """ Adds a node to the spine section in content.opf file """
 251
 252     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 253     spine.append(e)
 254
 255
 256 class TOC(object):
 257     def __init__(self, name=None, part_href=None):
 258         self.children = []
 259         self.name = name
 260         self.part_href = part_href
 261         self.sub_number = None
 262
 263     def add(self, name, part_href, level=0, is_part=True, index=None):
 264         assert level == 0 or index is None
 265         if level > 0 and self.children:
 266             return self.children[-1].add(name, part_href, level - 1, is_part)
 267         else:
 268             t = TOC(name)
 269             t.part_href = part_href
 270             if index is not None:
 271                 self.children.insert(index, t)
 272             else:
 273                 self.children.append(t)
 274             if not is_part:
 275                 t.sub_number = len(self.children) + 1
 276                 return t.sub_number
 277
 278     def append(self, toc):
 279         self.children.append(toc)
 280
 281     def extend(self, toc):
 282         self.children.extend(toc.children)
 283
 284     def depth(self):
 285         if self.children:
 286             return max((c.depth() for c in self.children)) + 1
 287         else:
 288             return 0
 289
 290     def href(self):
 291         src = self.part_href
 292         if self.sub_number is not None:
 293             src += '#sub%d' % self.sub_number
 294         return src
 295
 296     def write_to_xml(self, nav_map, counter=1):
 297         for child in self.children:
 298             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 299             nav_point.set('id', 'NavPoint-%d' % counter)
 300             nav_point.set('playOrder', str(counter))
 301
 302             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 303             text = nav_map.makeelement(NCXNS('text'))
 304             if child.name is not None:
 305                 text.text = re.sub(r'\n', ' ', child.name)
 306             else:
 307                 text.text = child.name
 308             nav_label.append(text)
 309             nav_point.append(nav_label)
 310
 311             content = nav_map.makeelement(NCXNS('content'))
 312             content.set('src', child.href())
 313             nav_point.append(content)
 314             nav_map.append(nav_point)
 315             counter = child.write_to_xml(nav_point, counter + 1)
 316         return counter
 317
 318     def html_part(self, depth=0):
 319         texts = []
 320         for child in self.children:
 321             texts.append(
 322                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 323                 (depth, child.href(), child.name))
 324             texts.append(child.html_part(depth + 1))
 325         return "\n".join(texts)
 326
 327     def html(self):
 328         with open(get_resource('epub/toc.html'), 'rb') as f:
 329             t = f.read().decode('utf-8')
 330         return t % self.html_part()
 331
 332
 333 def used_chars(element):
 334     """ Lists characters used in an ETree Element """
 335     chars = set((element.text or '') + (element.tail or ''))
 336     for child in element:
 337         chars = chars.union(used_chars(child))
 338     return chars
 339
 340
 341 def chop(main_text):
 342     """ divide main content of the XML file into chunks """
 343
 344     # prepare a container for each chunk
 345     part_xml = etree.Element('utwor')
 346     etree.SubElement(part_xml, 'master')
 347     main_xml_part = part_xml[0]  # master
 348
 349     last_node_part = False
 350
 351     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 352     is_scene = False
 353     is_act = False
 354     for one_part in main_text:
 355         name = one_part.tag
 356         if name == 'naglowek_scena':
 357             is_scene = True
 358         elif name == 'naglowek_akt':
 359             is_act = True
 360
 361     for one_part in main_text:
 362         name = one_part.tag
 363         if is_act is False and is_scene is True:
 364             if name == 'naglowek_czesc':
 365                 yield part_xml
 366                 last_node_part = True
 367                 main_xml_part[:] = [deepcopy(one_part)]
 368             elif not last_node_part and name == "naglowek_scena":
 369                 yield part_xml
 370                 main_xml_part[:] = [deepcopy(one_part)]
 371             else:
 372                 main_xml_part.append(deepcopy(one_part))
 373                 last_node_part = False
 374         else:
 375             if name == 'naglowek_czesc':
 376                 yield part_xml
 377                 last_node_part = True
 378                 main_xml_part[:] = [deepcopy(one_part)]
 379             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 380                 yield part_xml
 381                 main_xml_part[:] = [deepcopy(one_part)]
 382             else:
 383                 main_xml_part.append(deepcopy(one_part))
 384                 last_node_part = False
 385     yield part_xml
 386
 387
 388 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 389     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 390
 391     toc = TOC()
 392     for element in chunk_xml[0]:
 393         if element.tag == "naglowek_czesc":
 394             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 395         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 396             toc.add(node_name(element), "part%d.html" % chunk_no)
 397         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 398             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 399             element.set('sub', str(subnumber))
 400     if empty:
 401         if not _empty_html_static:
 402             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 403         chars = set()
 404         output_html = _empty_html_static[0]
 405     else:
 406         find_annotations(annotations, chunk_xml, chunk_no)
 407         replace_by_verse(chunk_xml)
 408         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 409         chars = used_chars(html_tree.getroot())
 410         output_html = etree.tostring(
 411             html_tree, pretty_print=True, xml_declaration=True,
 412             encoding="utf-8",
 413             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 414                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 415         )
 416     return output_html, toc, chars
 417
 418
 419 def transform(wldoc, verbose=False, style=None, html_toc=False,
 420               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
 421     """ produces a EPUB file
 422
 423     sample=n: generate sample e-book (with at least n paragraphs)
 424     cover: a cover.Cover factory or True for default
 425     flags: less-advertising, without-fonts, working-copy
 426     """
 427
 428     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 429         """ processes one input file and proceeds to its children """
 430
 431         replace_characters(wldoc.edoc.getroot())
 432
 433         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 434         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 435
 436         # every input file will have a TOC entry,
 437         # pointing to starting chunk
 438         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 439         chars = set()
 440         if first:
 441             # write book title page
 442             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
 443             chars = used_chars(html_tree.getroot())
 444             html_string = etree.tostring(
 445                 html_tree, pretty_print=True, xml_declaration=True,
 446                 encoding="utf-8",
 447                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 448                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 449             )
 450             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
 451             # add a title page TOC entry
 452             toc.add(u"Strona tytułowa", "title.html")
 453         elif wldoc.book_info.parts:
 454             # write title page for every parent
 455             if sample is not None and sample <= 0:
 456                 chars = set()
 457                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 458             else:
 459                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 460                 chars = used_chars(html_tree.getroot())
 461                 html_string = etree.tostring(
 462                     html_tree, pretty_print=True, xml_declaration=True,
 463                     encoding="utf-8",
 464                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 465                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 466                 )
 467             zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
 468             add_to_manifest(manifest, chunk_counter)
 469             add_to_spine(spine, chunk_counter)
 470             chunk_counter += 1
 471
 472         if len(wldoc.edoc.getroot()) > 1:
 473             # rdf before style master
 474             main_text = wldoc.edoc.getroot()[1]
 475         else:
 476             # rdf in style master
 477             main_text = wldoc.edoc.getroot()[0]
 478             if main_text.tag == RDFNS('RDF'):
 479                 main_text = None
 480
 481         if main_text is not None:
 482             for chunk_xml in chop(main_text):
 483                 empty = False
 484                 if sample is not None:
 485                     if sample <= 0:
 486                         empty = True
 487                     else:
 488                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 489                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 490
 491                 toc.extend(chunk_toc)
 492                 chars = chars.union(chunk_chars)
 493                 zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
 494                 add_to_manifest(manifest, chunk_counter)
 495                 add_to_spine(spine, chunk_counter)
 496                 chunk_counter += 1
 497
 498         for child in wldoc.parts():
 499             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 500                 child, chunk_counter, first=False, sample=sample)
 501             toc.append(child_toc)
 502             chars = chars.union(chunk_chars)
 503
 504         return toc, chunk_counter, chars, sample
 505
 506     document = deepcopy(wldoc)
 507     del wldoc
 508
 509     if flags:
 510         for flag in flags:
 511             document.edoc.getroot().set(flag, 'yes')
 512
 513     document.clean_ed_note()
 514     document.clean_ed_note('abstrakt')
 515
 516     # add editors info
 517     editors = document.editors()
 518     if editors:
 519         document.edoc.getroot().set('editors', u', '.join(sorted(
 520             editor.readable() for editor in editors)))
 521     if document.book_info.funders:
 522         document.edoc.getroot().set('funders', u', '.join(
 523             document.book_info.funders))
 524     if document.book_info.thanks:
 525         document.edoc.getroot().set('thanks', document.book_info.thanks)
 526
 527     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 528     manifest = opf.find('.//' + OPFNS('manifest'))
 529     guide = opf.find('.//' + OPFNS('guide'))
 530     spine = opf.find('.//' + OPFNS('spine'))
 531
 532     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 533     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 534
 535     functions.reg_mathml_epub(zip)
 536
 537     if os.path.isdir(ilustr_path):
 538         ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr'))
 539         for i, filename in enumerate(os.listdir(ilustr_path)):
 540             if filename not in ilustr_elements:
 541                 continue
 542             file_path = os.path.join(ilustr_path, filename)
 543             zip.write(file_path, os.path.join('OPS', filename))
 544             image_id = 'image%s' % i
 545             manifest.append(etree.fromstring(
 546                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 547
 548     # write static elements
 549     mime = zipfile.ZipInfo()
 550     mime.filename = 'mimetype'
 551     mime.compress_type = zipfile.ZIP_STORED
 552     mime.extra = b''
 553     zip.writestr(mime, b'application/epub+zip')
 554     zip.writestr(
 555         'META-INF/container.xml',
 556         b'<?xml version="1.0" ?>'
 557         b'<container version="1.0" '
 558         b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 559         b'<rootfiles><rootfile full-path="OPS/content.opf" '
 560         b'media-type="application/oebps-package+xml" />'
 561         b'</rootfiles></container>'
 562     )
 563     zip.write(get_resource('res/wl-logo-small.png'),
 564               os.path.join('OPS', 'logo_wolnelektury.png'))
 565     zip.write(get_resource('res/jedenprocent.png'),
 566               os.path.join('OPS', 'jedenprocent.png'))
 567     if not style:
 568         style = get_resource('epub/style.css')
 569     zip.write(style, os.path.join('OPS', 'style.css'))
 570
 571     if cover:
 572         if cover is True:
 573             cover = make_cover
 574
 575         cover_file = BytesIO()
 576         bound_cover = cover(document.book_info)
 577         bound_cover.save(cover_file)
 578         cover_name = 'cover.%s' % bound_cover.ext()
 579         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 580         del cover_file
 581
 582         cover_tree = etree.parse(get_resource('epub/cover.html'))
 583         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 584         zip.writestr('OPS/cover.html', etree.tostring(
 585             cover_tree, pretty_print=True, xml_declaration=True,
 586             encoding="utf-8",
 587             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 588                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 589         ))
 590
 591         if bound_cover.uses_dc_cover:
 592             if document.book_info.cover_by:
 593                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 594             if document.book_info.cover_source:
 595                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 596
 597         manifest.append(etree.fromstring(
 598             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 599         manifest.append(etree.fromstring(
 600             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 601         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 602         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 603         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 604
 605     annotations = etree.Element('annotations')
 606
 607     toc_file = etree.fromstring(
 608         b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 609         b'"-//NISO//DTD ncx 2005-1//EN" '
 610         b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 611         b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 612         b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 613         b'</navMap></ncx>'
 614     )
 615     nav_map = toc_file[-1]
 616
 617     if html_toc:
 618         manifest.append(etree.fromstring(
 619             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 620         spine.append(etree.fromstring(
 621             '<itemref idref="html_toc" />'))
 622         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 623
 624     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 625
 626     if len(toc.children) < 2:
 627         toc.add(u"Początek utworu", "part1.html")
 628
 629     # Last modifications in container files and EPUB creation
 630     if len(annotations) > 0:
 631         toc.add("Przypisy", "annotations.html")
 632         manifest.append(etree.fromstring(
 633             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 634         spine.append(etree.fromstring(
 635             '<itemref idref="annotations" />'))
 636         replace_by_verse(annotations)
 637         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 638         chars = chars.union(used_chars(html_tree.getroot()))
 639         zip.writestr('OPS/annotations.html', etree.tostring(
 640             html_tree, pretty_print=True, xml_declaration=True,
 641             encoding="utf-8",
 642             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 643                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 644         ))
 645
 646     toc.add("Wesprzyj Wolne Lektury", "support.html")
 647     manifest.append(etree.fromstring(
 648         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 649     spine.append(etree.fromstring(
 650         '<itemref idref="support" />'))
 651     html_string = open(get_resource('epub/support.html'), 'rb').read()
 652     chars.update(used_chars(etree.fromstring(html_string)))
 653     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
 654
 655     toc.add("Strona redakcyjna", "last.html")
 656     manifest.append(etree.fromstring(
 657         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 658     spine.append(etree.fromstring(
 659         '<itemref idref="last" />'))
 660     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
 661     chars.update(used_chars(html_tree.getroot()))
 662     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
 663         html_tree, pretty_print=True, xml_declaration=True,
 664         encoding="utf-8",
 665         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 666                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 667     )))
 668
 669     if not flags or 'without-fonts' not in flags:
 670         # strip fonts
 671         tmpdir = mkdtemp('-librarian-epub')
 672         try:
 673             cwd = os.getcwd()
 674         except OSError:
 675             cwd = None
 676
 677         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 678         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 679             optimizer_call = ['perl', 'subset.pl', '--chars',
 680                               ''.join(chars).encode('utf-8'),
 681                               get_resource('fonts/' + fname),
 682                               os.path.join(tmpdir, fname)]
 683             env = {"PERL_USE_UNSAFE_INC": "1"}
 684             if verbose:
 685                 print("Running font-optimizer")
 686                 subprocess.check_call(optimizer_call, env=env)
 687             else:
 688                 dev_null = open(os.devnull, 'w')
 689                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
 690             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 691             manifest.append(etree.fromstring(
 692                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 693         rmtree(tmpdir)
 694         if cwd is not None:
 695             os.chdir(cwd)
 696     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 697                  xml_declaration=True, encoding="utf-8"))
 698     title = document.book_info.title
 699     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 700     for st in attributes:
 701         meta = toc_file.makeelement(NCXNS('meta'))
 702         meta.set('name', st)
 703         meta.set('content', '0')
 704         toc_file[0].append(meta)
 705     toc_file[0][0].set('content', str(document.book_info.url))
 706     toc_file[0][1].set('content', str(toc.depth()))
 707     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 708
 709     # write TOC
 710     if html_toc:
 711         toc.add(u"Spis treści", "toc.html", index=1)
 712         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 713     toc.write_to_xml(nav_map)
 714     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 715                  xml_declaration=True, encoding="utf-8"))
 716     zip.close()
 717
 718     return OutputFile.from_filename(output_file.name)