src/librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import print_function, unicode_literals
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from six import BytesIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import make_cover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def squeeze_whitespace(s):
  33     return re.sub(b'\\s+', b' ', s)
  34
  35
  36 def set_hyph_language(source_tree):
  37     def get_short_lng_code(text):
  38         result = ''
  39         text = ''.join(text)
  40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  41             for line in f.read().decode('latin1').split('\n'):
  42                 list = line.strip().split('|')
  43                 if list[0] == text:
  44                     result = list[2]
  45         if result == '':
  46             return text
  47         else:
  48             return result
  49     bibl_lng = etree.XPath('//dc:language//text()',
  50                            namespaces={'dc': str(DCNS)})(source_tree)
  51     short_lng = get_short_lng_code(bibl_lng[0])
  52     try:
  53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  54                                        short_lng + '.dic'))
  55     except:
  56         pass
  57
  58
  59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  61     for t in texts:
  62         parent = t.getparent()
  63         if hyph is not None:
  64             newt = ''
  65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  66             for w in wlist:
  67                 newt += hyph.inserted(w, u'\u00AD')
  68         else:
  69             newt = t
  70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  71         if t.is_text:
  72             parent.text = newt
  73         elif t.is_tail:
  74             parent.tail = newt
  75
  76
  77 def inner_xml(node):
  78     """ returns node's text and children as a string
  79
  80     >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
  81     x<b>y</b>z
  82     """
  83
  84     nt = node.text if node.text is not None else ''
  85     return ''.join(
  86         [nt] + [etree.tostring(child, encoding='unicode') for child in node]
  87     )
  88
  89
  90 def set_inner_xml(node, text):
  91     """ sets node's text and children from a string
  92
  93     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  94     >>> set_inner_xml(e, 'x<b>y</b>z')
  95     >>> print(etree.tostring(e, encoding='unicode'))
  96     <a>x<b>y</b>z</a>
  97     """
  98
  99     p = etree.fromstring('<x>%s</x>' % text)
 100     node.text = p.text
 101     node[:] = p[:]
 102
 103
 104 def node_name(node):
 105     """ Find out a node's name
 106
 107     >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
 108     XYZ
 109     """
 110
 111     tempnode = deepcopy(node)
 112
 113     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 114         for e in tempnode.findall('.//%s' % p):
 115             t = e.tail
 116             e.clear()
 117             e.tail = t
 118     etree.strip_tags(tempnode, '*')
 119     return tempnode.text
 120
 121
 122 def xslt(xml, sheet, **kwargs):
 123     if isinstance(xml, etree._Element):
 124         xml = etree.ElementTree(xml)
 125     with open(sheet) as xsltf:
 126         transform = etree.XSLT(etree.parse(xsltf))
 127         params = dict(
 128             (key, transform.strparam(value))
 129             for key, value in kwargs.items()
 130         )
 131         return transform(xml, **params)
 132
 133
 134 def replace_characters(node):
 135     def replace_chars(text):
 136         if text is None:
 137             return None
 138         return text.replace(u"\ufeff", u"")\
 139                    .replace("---", u"\u2014")\
 140                    .replace("--", u"\u2013")\
 141                    .replace(",,", u"\u201E")\
 142                    .replace('"', u"\u201D")\
 143                    .replace("'", u"\u2019")
 144     if node.tag in ('uwaga', 'extra'):
 145         t = node.tail
 146         node.clear()
 147         node.tail = t
 148     node.text = replace_chars(node.text)
 149     node.tail = replace_chars(node.tail)
 150     for child in node:
 151         replace_characters(child)
 152
 153
 154 def find_annotations(annotations, source, part_no):
 155     for child in source:
 156         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 157             annotation = deepcopy(child)
 158             number = str(len(annotations) + 1)
 159             annotation.set('number', number)
 160             annotation.set('part', str(part_no))
 161             annotation.tail = ''
 162             annotations.append(annotation)
 163             tail = child.tail
 164             child.clear()
 165             child.tail = tail
 166             child.text = number
 167         if child.tag not in ('extra', 'uwaga'):
 168             find_annotations(annotations, child, part_no)
 169
 170
 171 class Stanza(object):
 172     """
 173     Converts / verse endings into verse elements in a stanza.
 174
 175     Slashes may only occur directly in the stanza. Any slashes in subelements
 176     will be ignored, and the subelements will be put inside verse elements.
 177
 178     >>> s = etree.fromstring(
 179     ...         "<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>"
 180     ...     )
 181     >>> Stanza(s).versify()
 182     >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip())
 183     <strofa>
 184       <wers_normalny>a <b>c</b><b>c</b></wers_normalny>
 185       <wers_normalny>b<x>x/
 186     y</x>c</wers_normalny>
 187       <wers_normalny>d</wers_normalny>
 188     </strofa>
 189
 190     """
 191     def __init__(self, stanza_elem):
 192         self.stanza = stanza_elem
 193         self.verses = []
 194         self.open_verse = None
 195
 196     def versify(self):
 197         self.push_text(self.stanza.text)
 198         for elem in self.stanza:
 199             self.push_elem(elem)
 200             self.push_text(elem.tail)
 201         tail = self.stanza.tail
 202         self.stanza.clear()
 203         self.stanza.tail = tail
 204         self.stanza.extend(
 205             verse for verse in self.verses
 206             if verse.text or len(verse) > 0
 207         )
 208
 209     def open_normal_verse(self):
 210         self.open_verse = self.stanza.makeelement("wers_normalny")
 211         self.verses.append(self.open_verse)
 212
 213     def get_open_verse(self):
 214         if self.open_verse is None:
 215             self.open_normal_verse()
 216         return self.open_verse
 217
 218     def push_text(self, text):
 219         if not text:
 220             return
 221         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 222             if i:
 223                 self.open_normal_verse()
 224             if not verse_text.strip():
 225                 continue
 226             verse = self.get_open_verse()
 227             if len(verse):
 228                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 229             else:
 230                 verse.text = (verse.text or "") + verse_text
 231
 232     def push_elem(self, elem):
 233         if elem.tag.startswith("wers"):
 234             verse = deepcopy(elem)
 235             verse.tail = None
 236             self.verses.append(verse)
 237             self.open_verse = verse
 238         else:
 239             appended = deepcopy(elem)
 240             appended.tail = None
 241             self.get_open_verse().append(appended)
 242
 243
 244 def replace_by_verse(tree):
 245     """ Find stanzas and create new verses in place of a '/' character """
 246
 247     stanzas = tree.findall('.//' + WLNS('strofa'))
 248     for stanza in stanzas:
 249         Stanza(stanza).versify()
 250
 251
 252 def add_to_manifest(manifest, partno):
 253     """ Adds a node to the manifest section in content.opf file """
 254
 255     partstr = 'part%d' % partno
 256     e = manifest.makeelement(
 257         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 258                                'media-type': 'application/xhtml+xml'}
 259     )
 260     manifest.append(e)
 261
 262
 263 def add_to_spine(spine, partno):
 264     """ Adds a node to the spine section in content.opf file """
 265
 266     e = spine.makeelement(
 267         OPFNS('itemref'),
 268         attrib={'idref': 'part%d' % partno}
 269     )
 270     spine.append(e)
 271
 272
 273 class TOC(object):
 274     def __init__(self, name=None, part_href=None):
 275         self.children = []
 276         self.name = name
 277         self.part_href = part_href
 278         self.sub_number = None
 279
 280     def add(self, name, part_href, level=0, is_part=True, index=None):
 281         assert level == 0 or index is None
 282         if level > 0 and self.children:
 283             return self.children[-1].add(name, part_href, level - 1, is_part)
 284         else:
 285             t = TOC(name)
 286             t.part_href = part_href
 287             if index is not None:
 288                 self.children.insert(index, t)
 289             else:
 290                 self.children.append(t)
 291             if not is_part:
 292                 t.sub_number = len(self.children) + 1
 293                 return t.sub_number
 294
 295     def append(self, toc):
 296         self.children.append(toc)
 297
 298     def extend(self, toc):
 299         self.children.extend(toc.children)
 300
 301     def depth(self):
 302         if self.children:
 303             return max((c.depth() for c in self.children)) + 1
 304         else:
 305             return 0
 306
 307     def href(self):
 308         src = self.part_href
 309         if self.sub_number is not None:
 310             src += '#sub%d' % self.sub_number
 311         return src
 312
 313     def write_to_xml(self, nav_map, counter=1):
 314         for child in self.children:
 315             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 316             nav_point.set('id', 'NavPoint-%d' % counter)
 317             nav_point.set('playOrder', str(counter))
 318
 319             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 320             text = nav_map.makeelement(NCXNS('text'))
 321             if child.name is not None:
 322                 text.text = re.sub(r'\n', ' ', child.name)
 323             else:
 324                 text.text = child.name
 325             nav_label.append(text)
 326             nav_point.append(nav_label)
 327
 328             content = nav_map.makeelement(NCXNS('content'))
 329             content.set('src', child.href())
 330             nav_point.append(content)
 331             nav_map.append(nav_point)
 332             counter = child.write_to_xml(nav_point, counter + 1)
 333         return counter
 334
 335     def html_part(self, depth=0):
 336         texts = []
 337         for child in self.children:
 338             texts.append(
 339                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 340                 (depth, child.href(), child.name))
 341             texts.append(child.html_part(depth + 1))
 342         return "\n".join(texts)
 343
 344     def html(self):
 345         with open(get_resource('epub/toc.html'), 'rb') as f:
 346             t = f.read().decode('utf-8')
 347         return t % self.html_part()
 348
 349
 350 def used_chars(element):
 351     """ Lists characters used in an ETree Element """
 352     chars = set((element.text or '') + (element.tail or ''))
 353     for child in element:
 354         chars = chars.union(used_chars(child))
 355     return chars
 356
 357
 358 def chop(main_text):
 359     """ divide main content of the XML file into chunks """
 360
 361     # prepare a container for each chunk
 362     part_xml = etree.Element('utwor')
 363     etree.SubElement(part_xml, 'master')
 364     main_xml_part = part_xml[0]  # master
 365
 366     last_node_part = False
 367
 368     # The below loop are workaround for a problem with epubs
 369     # in drama ebooks without acts.
 370     is_scene = False
 371     is_act = False
 372     for one_part in main_text:
 373         name = one_part.tag
 374         if name == 'naglowek_scena':
 375             is_scene = True
 376         elif name == 'naglowek_akt':
 377             is_act = True
 378
 379     for one_part in main_text:
 380         name = one_part.tag
 381         if is_act is False and is_scene is True:
 382             if name == 'naglowek_czesc':
 383                 yield part_xml
 384                 last_node_part = True
 385                 main_xml_part[:] = [deepcopy(one_part)]
 386             elif not last_node_part and name == "naglowek_scena":
 387                 yield part_xml
 388                 main_xml_part[:] = [deepcopy(one_part)]
 389             else:
 390                 main_xml_part.append(deepcopy(one_part))
 391                 last_node_part = False
 392         else:
 393             if name == 'naglowek_czesc':
 394                 yield part_xml
 395                 last_node_part = True
 396                 main_xml_part[:] = [deepcopy(one_part)]
 397             elif (not last_node_part
 398                   and name in (
 399                       "naglowek_rozdzial", "naglowek_akt", "srodtytul"
 400                   )):
 401                 yield part_xml
 402                 main_xml_part[:] = [deepcopy(one_part)]
 403             else:
 404                 main_xml_part.append(deepcopy(one_part))
 405                 last_node_part = False
 406     yield part_xml
 407
 408
 409 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
 410                     _empty_html_static=[]):
 411     """
 412     Transforms one chunk, returns a HTML string, a TOC object
 413     and a set of used characters.
 414     """
 415
 416     toc = TOC()
 417     for element in chunk_xml[0]:
 418         if element.tag == "naglowek_czesc":
 419             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 420         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 421             toc.add(node_name(element), "part%d.html" % chunk_no)
 422         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 423             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no,
 424                                 level=1, is_part=False)
 425             element.set('sub', str(subnumber))
 426     if empty:
 427         if not _empty_html_static:
 428             with open(get_resource('epub/emptyChunk.html')) as f:
 429                 _empty_html_static.append(f.read())
 430         chars = set()
 431         output_html = _empty_html_static[0]
 432     else:
 433         find_annotations(annotations, chunk_xml, chunk_no)
 434         replace_by_verse(chunk_xml)
 435         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 436         chars = used_chars(html_tree.getroot())
 437         output_html = etree.tostring(
 438             html_tree, pretty_print=True, xml_declaration=True,
 439             encoding="utf-8",
 440             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 441                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 442         )
 443     return output_html, toc, chars
 444
 445
 446 def transform(wldoc, verbose=False, style=None, html_toc=False,
 447               sample=None, cover=None, flags=None, hyphenate=False,
 448               ilustr_path='', output_type='epub'):
 449     """ produces a EPUB file
 450
 451     sample=n: generate sample e-book (with at least n paragraphs)
 452     cover: a cover.Cover factory or True for default
 453     flags: less-advertising, without-fonts, working-copy
 454     """
 455
 456     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 457         """ processes one input file and proceeds to its children """
 458
 459         replace_characters(wldoc.edoc.getroot())
 460
 461         hyphenator = set_hyph_language(
 462             wldoc.edoc.getroot()
 463         ) if hyphenate else None
 464         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 465
 466         # every input file will have a TOC entry,
 467         # pointing to starting chunk
 468         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 469         chars = set()
 470         if first:
 471             # write book title page
 472             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'),
 473                              outputtype=output_type)
 474             chars = used_chars(html_tree.getroot())
 475             html_string = etree.tostring(
 476                 html_tree, pretty_print=True, xml_declaration=True,
 477                 encoding="utf-8",
 478                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 479                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 480             )
 481             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
 482             # add a title page TOC entry
 483             toc.add(u"Strona tytułowa", "title.html")
 484         elif wldoc.book_info.parts:
 485             # write title page for every parent
 486             if sample is not None and sample <= 0:
 487                 chars = set()
 488                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 489             else:
 490                 html_tree = xslt(wldoc.edoc,
 491                                  get_resource('epub/xsltChunkTitle.xsl'))
 492                 chars = used_chars(html_tree.getroot())
 493                 html_string = etree.tostring(
 494                     html_tree, pretty_print=True, xml_declaration=True,
 495                     encoding="utf-8",
 496                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
 497                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 498                 )
 499             zip.writestr('OPS/part%d.html' % chunk_counter,
 500                          squeeze_whitespace(html_string))
 501             add_to_manifest(manifest, chunk_counter)
 502             add_to_spine(spine, chunk_counter)
 503             chunk_counter += 1
 504
 505         if len(wldoc.edoc.getroot()) > 1:
 506             # rdf before style master
 507             main_text = wldoc.edoc.getroot()[1]
 508         else:
 509             # rdf in style master
 510             main_text = wldoc.edoc.getroot()[0]
 511             if main_text.tag == RDFNS('RDF'):
 512                 main_text = None
 513
 514         if main_text is not None:
 515             for chunk_xml in chop(main_text):
 516                 empty = False
 517                 if sample is not None:
 518                     if sample <= 0:
 519                         empty = True
 520                     else:
 521                         sample -= len(chunk_xml.xpath(
 522                             '//strofa|//akap|//akap_cd|//akap_dialog'
 523                         ))
 524                 chunk_html, chunk_toc, chunk_chars = transform_chunk(
 525                     chunk_xml, chunk_counter, annotations, empty)
 526
 527                 toc.extend(chunk_toc)
 528                 chars = chars.union(chunk_chars)
 529                 zip.writestr('OPS/part%d.html' % chunk_counter,
 530                              squeeze_whitespace(chunk_html))
 531                 add_to_manifest(manifest, chunk_counter)
 532                 add_to_spine(spine, chunk_counter)
 533                 chunk_counter += 1
 534
 535         for child in wldoc.parts():
 536             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 537                 child, chunk_counter, first=False, sample=sample)
 538             toc.append(child_toc)
 539             chars = chars.union(chunk_chars)
 540
 541         return toc, chunk_counter, chars, sample
 542
 543     document = deepcopy(wldoc)
 544     del wldoc
 545
 546     if flags:
 547         for flag in flags:
 548             document.edoc.getroot().set(flag, 'yes')
 549
 550     document.clean_ed_note()
 551     document.clean_ed_note('abstrakt')
 552
 553     # add editors info
 554     editors = document.editors()
 555     if editors:
 556         document.edoc.getroot().set('editors', u', '.join(sorted(
 557             editor.readable() for editor in editors)))
 558     if document.book_info.funders:
 559         document.edoc.getroot().set('funders', u', '.join(
 560             document.book_info.funders))
 561     if document.book_info.thanks:
 562         document.edoc.getroot().set('thanks', document.book_info.thanks)
 563
 564     opf = xslt(document.book_info.to_etree(),
 565                get_resource('epub/xsltContent.xsl'))
 566     manifest = opf.find('.//' + OPFNS('manifest'))
 567     guide = opf.find('.//' + OPFNS('guide'))
 568     spine = opf.find('.//' + OPFNS('spine'))
 569
 570     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
 571                                      delete=False)
 572     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 573
 574     functions.reg_mathml_epub(zip)
 575
 576     if os.path.isdir(ilustr_path):
 577         ilustr_elements = set(ilustr.get('src')
 578                               for ilustr in document.edoc.findall('//ilustr'))
 579         for i, filename in enumerate(os.listdir(ilustr_path)):
 580             if filename not in ilustr_elements:
 581                 continue
 582             file_path = os.path.join(ilustr_path, filename)
 583             zip.write(file_path, os.path.join('OPS', filename))
 584             image_id = 'image%s' % i
 585             manifest.append(etree.fromstring(
 586                 '<item id="%s" href="%s" media-type="%s" />' % (
 587                     image_id, filename, guess_type(file_path)[0])
 588             ))
 589
 590     # write static elements
 591     mime = zipfile.ZipInfo()
 592     mime.filename = 'mimetype'
 593     mime.compress_type = zipfile.ZIP_STORED
 594     mime.extra = b''
 595     zip.writestr(mime, b'application/epub+zip')
 596     zip.writestr(
 597         'META-INF/container.xml',
 598         b'<?xml version="1.0" ?>'
 599         b'<container version="1.0" '
 600         b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 601         b'<rootfiles><rootfile full-path="OPS/content.opf" '
 602         b'media-type="application/oebps-package+xml" />'
 603         b'</rootfiles></container>'
 604     )
 605     zip.write(get_resource('res/wl-logo-small.png'),
 606               os.path.join('OPS', 'logo_wolnelektury.png'))
 607     zip.write(get_resource('res/jedenprocent.png'),
 608               os.path.join('OPS', 'jedenprocent.png'))
 609     if not style:
 610         style = get_resource('epub/style.css')
 611     zip.write(style, os.path.join('OPS', 'style.css'))
 612
 613     if cover:
 614         if cover is True:
 615             cover = make_cover
 616
 617         cover_file = BytesIO()
 618         bound_cover = cover(document.book_info)
 619         bound_cover.save(cover_file)
 620         cover_name = 'cover.%s' % bound_cover.ext()
 621         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 622         del cover_file
 623
 624         cover_tree = etree.parse(get_resource('epub/cover.html'))
 625         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 626         zip.writestr('OPS/cover.html', etree.tostring(
 627             cover_tree, pretty_print=True, xml_declaration=True,
 628             encoding="utf-8",
 629             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 630                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 631         ))
 632
 633         if bound_cover.uses_dc_cover:
 634             if document.book_info.cover_by:
 635                 document.edoc.getroot().set('data-cover-by',
 636                                             document.book_info.cover_by)
 637             if document.book_info.cover_source:
 638                 document.edoc.getroot().set('data-cover-source',
 639                                             document.book_info.cover_source)
 640
 641         manifest.append(etree.fromstring(
 642             '<item id="cover" href="cover.html" '
 643             'media-type="application/xhtml+xml" />'
 644         ))
 645         manifest.append(etree.fromstring(
 646             '<item id="cover-image" href="%s" media-type="%s" />' % (
 647                 cover_name, bound_cover.mime_type()
 648             )
 649         ))
 650         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 651         opf.getroot()[0].append(etree.fromstring(
 652             '<meta name="cover" content="cover-image"/>'
 653         ))
 654         guide.append(etree.fromstring(
 655             '<reference href="cover.html" type="cover" title="Okładka"/>'
 656         ))
 657
 658     annotations = etree.Element('annotations')
 659
 660     toc_file = etree.fromstring(
 661         b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 662         b'"-//NISO//DTD ncx 2005-1//EN" '
 663         b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 664         b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 665         b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 666         b'</navMap></ncx>'
 667     )
 668     nav_map = toc_file[-1]
 669
 670     if html_toc:
 671         manifest.append(etree.fromstring(
 672             '<item id="html_toc" href="toc.html" '
 673             'media-type="application/xhtml+xml" />'
 674         ))
 675         spine.append(etree.fromstring(
 676             '<itemref idref="html_toc" />'))
 677         guide.append(etree.fromstring(
 678             '<reference href="toc.html" type="toc" title="Spis treści"/>'
 679         ))
 680
 681     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 682
 683     if len(toc.children) < 2:
 684         toc.add(u"Początek utworu", "part1.html")
 685
 686     # Last modifications in container files and EPUB creation
 687     if len(annotations) > 0:
 688         toc.add("Przypisy", "annotations.html")
 689         manifest.append(etree.fromstring(
 690             '<item id="annotations" href="annotations.html" '
 691             'media-type="application/xhtml+xml" />'
 692         ))
 693         spine.append(etree.fromstring(
 694             '<itemref idref="annotations" />'))
 695         replace_by_verse(annotations)
 696         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 697         chars = chars.union(used_chars(html_tree.getroot()))
 698         zip.writestr('OPS/annotations.html', etree.tostring(
 699             html_tree, pretty_print=True, xml_declaration=True,
 700             encoding="utf-8",
 701             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 702                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 703         ))
 704
 705     toc.add("Wesprzyj Wolne Lektury", "support.html")
 706     manifest.append(etree.fromstring(
 707         '<item id="support" href="support.html" '
 708         'media-type="application/xhtml+xml" />'
 709     ))
 710     spine.append(etree.fromstring(
 711         '<itemref idref="support" />'))
 712     html_string = open(get_resource('epub/support.html'), 'rb').read()
 713     chars.update(used_chars(etree.fromstring(html_string)))
 714     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
 715
 716     toc.add("Strona redakcyjna", "last.html")
 717     manifest.append(etree.fromstring(
 718         '<item id="last" href="last.html" '
 719         'media-type="application/xhtml+xml" />'
 720     ))
 721     spine.append(etree.fromstring(
 722         '<itemref idref="last" />'))
 723     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
 724                      outputtype=output_type)
 725     chars.update(used_chars(html_tree.getroot()))
 726     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
 727         html_tree, pretty_print=True, xml_declaration=True,
 728         encoding="utf-8",
 729         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 730                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 731     )))
 732
 733     if not flags or 'without-fonts' not in flags:
 734         # strip fonts
 735         tmpdir = mkdtemp('-librarian-epub')
 736         try:
 737             cwd = os.getcwd()
 738         except OSError:
 739             cwd = None
 740
 741         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
 742                               'font-optimizer'))
 743         for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
 744                       'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
 745             optimizer_call = ['perl', 'subset.pl', '--chars',
 746                               ''.join(chars).encode('utf-8'),
 747                               get_resource('fonts/' + fname),
 748                               os.path.join(tmpdir, fname)]
 749             env = {"PERL_USE_UNSAFE_INC": "1"}
 750             if verbose:
 751                 print("Running font-optimizer")
 752                 subprocess.check_call(optimizer_call, env=env)
 753             else:
 754                 dev_null = open(os.devnull, 'w')
 755                 subprocess.check_call(optimizer_call, stdout=dev_null,
 756                                       stderr=dev_null, env=env)
 757             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 758             manifest.append(etree.fromstring(
 759                 '<item id="%s" href="%s" '
 760                 'media-type="application/x-font-truetype" />'
 761                 % (fname, fname)
 762             ))
 763         rmtree(tmpdir)
 764         if cwd is not None:
 765             os.chdir(cwd)
 766     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 767                  xml_declaration=True, encoding="utf-8"))
 768     title = document.book_info.title
 769     attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount",
 770                   "dtb:maxPageNumber")
 771     for st in attributes:
 772         meta = toc_file.makeelement(NCXNS('meta'))
 773         meta.set('name', st)
 774         meta.set('content', '0')
 775         toc_file[0].append(meta)
 776     toc_file[0][0].set('content', str(document.book_info.url))
 777     toc_file[0][1].set('content', str(toc.depth()))
 778     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 779
 780     # write TOC
 781     if html_toc:
 782         toc.add(u"Spis treści", "toc.html", index=1)
 783         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 784     toc.write_to_xml(nav_map)
 785     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 786                  xml_declaration=True, encoding="utf-8"))
 787     zip.close()
 788
 789     return OutputFile.from_filename(output_file.name)