librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import DefaultEbookCover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def set_hyph_language(source_tree):
  33     def get_short_lng_code(text):
  34         result = ''
  35         text = ''.join(text)
  36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  37             for line in f:
  38                 list = line.strip().split('|')
  39                 if list[0] == text:
  40                     result = list[2]
  41         if result == '':
  42             return text
  43         else:
  44             return result
  45     bibl_lng = etree.XPath('//dc:language//text()',
  46                            namespaces={'dc': str(DCNS)})(source_tree)
  47     short_lng = get_short_lng_code(bibl_lng[0])
  48     try:
  49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  50                                        short_lng + '.dic'))
  51     except:
  52         pass
  53
  54
  55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  57     for t in texts:
  58         parent = t.getparent()
  59         if hyph is not None:
  60             newt = ''
  61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  62             for w in wlist:
  63                 newt += hyph.inserted(w, u'\u00AD')
  64         else:
  65             newt = t
  66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  67         if t.is_text:
  68             parent.text = newt
  69         elif t.is_tail:
  70             parent.tail = newt
  71
  72
  73 def inner_xml(node):
  74     """ returns node's text and children as a string
  75
  76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  77     x<b>y</b>z
  78     """
  79
  80     nt = node.text if node.text is not None else ''
  81     return ''.join([nt] + [etree.tostring(child) for child in node])
  82
  83
  84 def set_inner_xml(node, text):
  85     """ sets node's text and children from a string
  86
  87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  88     >>> set_inner_xml(e, 'x<b>y</b>z')
  89     >>> print etree.tostring(e)
  90     <a>x<b>y</b>z</a>
  91     """
  92
  93     p = etree.fromstring('<x>%s</x>' % text)
  94     node.text = p.text
  95     node[:] = p[:]
  96
  97
  98 def node_name(node):
  99     """ Find out a node's name
 100
 101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 102     XYZ
 103     """
 104
 105     tempnode = deepcopy(node)
 106
 107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 108         for e in tempnode.findall('.//%s' % p):
 109             t = e.tail
 110             e.clear()
 111             e.tail = t
 112     etree.strip_tags(tempnode, '*')
 113     return tempnode.text
 114
 115
 116 def xslt(xml, sheet):
 117     if isinstance(xml, etree._Element):
 118         xml = etree.ElementTree(xml)
 119     with open(sheet) as xsltf:
 120         return xml.xslt(etree.parse(xsltf))
 121
 122
 123 def replace_characters(node):
 124     def replace_chars(text):
 125         if text is None:
 126             return None
 127         return text.replace(u"\ufeff", u"")\
 128                    .replace("---", u"\u2014")\
 129                    .replace("--", u"\u2013")\
 130                    .replace(",,", u"\u201E")\
 131                    .replace('"', u"\u201D")\
 132                    .replace("'", u"\u2019")
 133     if node.tag in ('uwaga', 'extra'):
 134         t = node.tail
 135         node.clear()
 136         node.tail = t
 137     node.text = replace_chars(node.text)
 138     node.tail = replace_chars(node.tail)
 139     for child in node:
 140         replace_characters(child)
 141
 142
 143 def find_annotations(annotations, source, part_no):
 144     for child in source:
 145         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 146             annotation = deepcopy(child)
 147             number = str(len(annotations) + 1)
 148             annotation.set('number', number)
 149             annotation.set('part', str(part_no))
 150             annotation.tail = ''
 151             annotations.append(annotation)
 152             tail = child.tail
 153             child.clear()
 154             child.tail = tail
 155             child.text = number
 156         if child.tag not in ('extra', 'uwaga'):
 157             find_annotations(annotations, child, part_no)
 158
 159
 160 class Stanza(object):
 161     """
 162     Converts / verse endings into verse elements in a stanza.
 163
 164     Slashes may only occur directly in the stanza. Any slashes in subelements
 165     will be ignored, and the subelements will be put inside verse elements.
 166
 167     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 168     >>> Stanza(s).versify()
 169     >>> print etree.tostring(s)
 170     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 171     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 172
 173     """
 174     def __init__(self, stanza_elem):
 175         self.stanza = stanza_elem
 176         self.verses = []
 177         self.open_verse = None
 178
 179     def versify(self):
 180         self.push_text(self.stanza.text)
 181         for elem in self.stanza:
 182             self.push_elem(elem)
 183             self.push_text(elem.tail)
 184         tail = self.stanza.tail
 185         self.stanza.clear()
 186         self.stanza.tail = tail
 187         self.stanza.extend(self.verses)
 188
 189     def open_normal_verse(self):
 190         self.open_verse = self.stanza.makeelement("wers_normalny")
 191         self.verses.append(self.open_verse)
 192
 193     def get_open_verse(self):
 194         if self.open_verse is None:
 195             self.open_normal_verse()
 196         return self.open_verse
 197
 198     def push_text(self, text):
 199         if not text:
 200             return
 201         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 202             if i:
 203                 self.open_normal_verse()
 204             verse = self.get_open_verse()
 205             if len(verse):
 206                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 207             else:
 208                 verse.text = (verse.text or "") + verse_text
 209
 210     def push_elem(self, elem):
 211         if elem.tag.startswith("wers"):
 212             verse = deepcopy(elem)
 213             verse.tail = None
 214             self.verses.append(verse)
 215             self.open_verse = verse
 216         else:
 217             appended = deepcopy(elem)
 218             appended.tail = None
 219             self.get_open_verse().append(appended)
 220
 221
 222 def replace_by_verse(tree):
 223     """ Find stanzas and create new verses in place of a '/' character """
 224
 225     stanzas = tree.findall('.//' + WLNS('strofa'))
 226     for stanza in stanzas:
 227         Stanza(stanza).versify()
 228
 229
 230 def add_to_manifest(manifest, partno):
 231     """ Adds a node to the manifest section in content.opf file """
 232
 233     partstr = 'part%d' % partno
 234     e = manifest.makeelement(
 235         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 236                                'media-type': 'application/xhtml+xml'}
 237     )
 238     manifest.append(e)
 239
 240
 241 def add_to_spine(spine, partno):
 242     """ Adds a node to the spine section in content.opf file """
 243
 244     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 245     spine.append(e)
 246
 247
 248 class TOC(object):
 249     def __init__(self, name=None, part_href=None):
 250         self.children = []
 251         self.name = name
 252         self.part_href = part_href
 253         self.sub_number = None
 254
 255     def add(self, name, part_href, level=0, is_part=True, index=None):
 256         assert level == 0 or index is None
 257         if level > 0 and self.children:
 258             return self.children[-1].add(name, part_href, level - 1, is_part)
 259         else:
 260             t = TOC(name)
 261             t.part_href = part_href
 262             if index is not None:
 263                 self.children.insert(index, t)
 264             else:
 265                 self.children.append(t)
 266             if not is_part:
 267                 t.sub_number = len(self.children) + 1
 268                 return t.sub_number
 269
 270     def append(self, toc):
 271         self.children.append(toc)
 272
 273     def extend(self, toc):
 274         self.children.extend(toc.children)
 275
 276     def depth(self):
 277         if self.children:
 278             return max((c.depth() for c in self.children)) + 1
 279         else:
 280             return 0
 281
 282     def href(self):
 283         src = self.part_href
 284         if self.sub_number is not None:
 285             src += '#sub%d' % self.sub_number
 286         return src
 287
 288     def write_to_xml(self, nav_map, counter=1):
 289         for child in self.children:
 290             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 291             nav_point.set('id', 'NavPoint-%d' % counter)
 292             nav_point.set('playOrder', str(counter))
 293
 294             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 295             text = nav_map.makeelement(NCXNS('text'))
 296             if child.name is not None:
 297                 text.text = re.sub(r'\n', ' ', child.name)
 298             else:
 299                 text.text = child.name
 300             nav_label.append(text)
 301             nav_point.append(nav_label)
 302
 303             content = nav_map.makeelement(NCXNS('content'))
 304             content.set('src', child.href())
 305             nav_point.append(content)
 306             nav_map.append(nav_point)
 307             counter = child.write_to_xml(nav_point, counter + 1)
 308         return counter
 309
 310     def html_part(self, depth=0):
 311         texts = []
 312         for child in self.children:
 313             texts.append(
 314                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 315                 (depth, child.href(), child.name))
 316             texts.append(child.html_part(depth + 1))
 317         return "\n".join(texts)
 318
 319     def html(self):
 320         with open(get_resource('epub/toc.html')) as f:
 321             t = unicode(f.read(), 'utf-8')
 322         return t % self.html_part()
 323
 324
 325 def used_chars(element):
 326     """ Lists characters used in an ETree Element """
 327     chars = set((element.text or '') + (element.tail or ''))
 328     for child in element:
 329         chars = chars.union(used_chars(child))
 330     return chars
 331
 332
 333 def chop(main_text):
 334     """ divide main content of the XML file into chunks """
 335
 336     # prepare a container for each chunk
 337     part_xml = etree.Element('utwor')
 338     etree.SubElement(part_xml, 'master')
 339     main_xml_part = part_xml[0]  # master
 340
 341     last_node_part = False
 342
 343     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 344     is_scene = False
 345     is_act = False
 346     for one_part in main_text:
 347         name = one_part.tag
 348         if name == 'naglowek_scena':
 349             is_scene = True
 350         elif name == 'naglowek_akt':
 351             is_act = True
 352
 353     for one_part in main_text:
 354         name = one_part.tag
 355         if is_act is False and is_scene is True:
 356             if name == 'naglowek_czesc':
 357                 yield part_xml
 358                 last_node_part = True
 359                 main_xml_part[:] = [deepcopy(one_part)]
 360             elif not last_node_part and name == "naglowek_scena":
 361                 yield part_xml
 362                 main_xml_part[:] = [deepcopy(one_part)]
 363             else:
 364                 main_xml_part.append(deepcopy(one_part))
 365                 last_node_part = False
 366         else:
 367             if name == 'naglowek_czesc':
 368                 yield part_xml
 369                 last_node_part = True
 370                 main_xml_part[:] = [deepcopy(one_part)]
 371             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 372                 yield part_xml
 373                 main_xml_part[:] = [deepcopy(one_part)]
 374             else:
 375                 main_xml_part.append(deepcopy(one_part))
 376                 last_node_part = False
 377     yield part_xml
 378
 379
 380 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 381     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 382
 383     toc = TOC()
 384     for element in chunk_xml[0]:
 385         if element.tag == "naglowek_czesc":
 386             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 387         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 388             toc.add(node_name(element), "part%d.html" % chunk_no)
 389         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 390             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 391             element.set('sub', str(subnumber))
 392     if empty:
 393         if not _empty_html_static:
 394             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 395         chars = set()
 396         output_html = _empty_html_static[0]
 397     else:
 398         find_annotations(annotations, chunk_xml, chunk_no)
 399         replace_by_verse(chunk_xml)
 400         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 401         chars = used_chars(html_tree.getroot())
 402         output_html = etree.tostring(
 403             html_tree, pretty_print=True, xml_declaration=True,
 404             encoding="utf-8",
 405             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 406                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 407         )
 408     return output_html, toc, chars
 409
 410
 411 def transform(wldoc, verbose=False, style=None, html_toc=False,
 412               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path=''):
 413     """ produces a EPUB file
 414
 415     sample=n: generate sample e-book (with at least n paragraphs)
 416     cover: a cover.Cover factory or True for default
 417     flags: less-advertising, without-fonts, working-copy
 418     """
 419
 420     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 421         """ processes one input file and proceeds to its children """
 422
 423         replace_characters(wldoc.edoc.getroot())
 424
 425         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 426         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 427
 428         # every input file will have a TOC entry,
 429         # pointing to starting chunk
 430         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 431         chars = set()
 432         if first:
 433             # write book title page
 434             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
 435             chars = used_chars(html_tree.getroot())
 436             zip.writestr(
 437                 'OPS/title.html',
 438                 etree.tostring(
 439                     html_tree, pretty_print=True, xml_declaration=True,
 440                     encoding="utf-8",
 441                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 442                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 443                 )
 444             )
 445             # add a title page TOC entry
 446             toc.add(u"Strona tytułowa", "title.html")
 447         elif wldoc.book_info.parts:
 448             # write title page for every parent
 449             if sample is not None and sample <= 0:
 450                 chars = set()
 451                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 452             else:
 453                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 454                 chars = used_chars(html_tree.getroot())
 455                 html_string = etree.tostring(
 456                     html_tree, pretty_print=True, xml_declaration=True,
 457                     encoding="utf-8",
 458                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 459                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 460                 )
 461             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 462             add_to_manifest(manifest, chunk_counter)
 463             add_to_spine(spine, chunk_counter)
 464             chunk_counter += 1
 465
 466         if len(wldoc.edoc.getroot()) > 1:
 467             # rdf before style master
 468             main_text = wldoc.edoc.getroot()[1]
 469         else:
 470             # rdf in style master
 471             main_text = wldoc.edoc.getroot()[0]
 472             if main_text.tag == RDFNS('RDF'):
 473                 main_text = None
 474
 475         if main_text is not None:
 476             for chunk_xml in chop(main_text):
 477                 empty = False
 478                 if sample is not None:
 479                     if sample <= 0:
 480                         empty = True
 481                     else:
 482                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 483                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 484
 485                 toc.extend(chunk_toc)
 486                 chars = chars.union(chunk_chars)
 487                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 488                 add_to_manifest(manifest, chunk_counter)
 489                 add_to_spine(spine, chunk_counter)
 490                 chunk_counter += 1
 491
 492         for child in wldoc.parts():
 493             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 494                 child, chunk_counter, first=False, sample=sample)
 495             toc.append(child_toc)
 496             chars = chars.union(chunk_chars)
 497
 498         return toc, chunk_counter, chars, sample
 499
 500     document = deepcopy(wldoc)
 501     del wldoc
 502
 503     if flags:
 504         for flag in flags:
 505             document.edoc.getroot().set(flag, 'yes')
 506
 507     document.clean_ed_note()
 508     document.clean_ed_note('abstrakt')
 509
 510     # add editors info
 511     editors = document.editors()
 512     if editors:
 513         document.edoc.getroot().set('editors', u', '.join(sorted(
 514             editor.readable() for editor in editors)))
 515     if document.book_info.funders:
 516         document.edoc.getroot().set('funders', u', '.join(
 517             document.book_info.funders))
 518     if document.book_info.thanks:
 519         document.edoc.getroot().set('thanks', document.book_info.thanks)
 520
 521     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 522     manifest = opf.find('.//' + OPFNS('manifest'))
 523     guide = opf.find('.//' + OPFNS('guide'))
 524     spine = opf.find('.//' + OPFNS('spine'))
 525
 526     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 527     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 528
 529     functions.reg_mathml_epub(zip)
 530
 531     if os.path.isdir(ilustr_path):
 532         for i, filename in enumerate(os.listdir(ilustr_path)):
 533             file_path = os.path.join(ilustr_path, filename)
 534             zip.write(file_path, os.path.join('OPS', filename))
 535             image_id = 'image%s' % i
 536             manifest.append(etree.fromstring(
 537                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 538
 539     # write static elements
 540     mime = zipfile.ZipInfo()
 541     mime.filename = 'mimetype'
 542     mime.compress_type = zipfile.ZIP_STORED
 543     mime.extra = ''
 544     zip.writestr(mime, 'application/epub+zip')
 545     zip.writestr(
 546         'META-INF/container.xml',
 547         '<?xml version="1.0" ?>'
 548         '<container version="1.0" '
 549         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 550         '<rootfiles><rootfile full-path="OPS/content.opf" '
 551         'media-type="application/oebps-package+xml" />'
 552         '</rootfiles></container>'
 553     )
 554     zip.write(get_resource('res/wl-logo-small.png'),
 555               os.path.join('OPS', 'logo_wolnelektury.png'))
 556     zip.write(get_resource('res/jedenprocent.png'),
 557               os.path.join('OPS', 'jedenprocent.png'))
 558     if not style:
 559         style = get_resource('epub/style.css')
 560     zip.write(style, os.path.join('OPS', 'style.css'))
 561
 562     if cover:
 563         if cover is True:
 564             cover = DefaultEbookCover
 565
 566         cover_file = StringIO()
 567         bound_cover = cover(document.book_info)
 568         bound_cover.save(cover_file)
 569         cover_name = 'cover.%s' % bound_cover.ext()
 570         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 571         del cover_file
 572
 573         cover_tree = etree.parse(get_resource('epub/cover.html'))
 574         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 575         zip.writestr('OPS/cover.html', etree.tostring(
 576             cover_tree, pretty_print=True, xml_declaration=True,
 577             encoding="utf-8",
 578             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 579                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 580         ))
 581
 582         if bound_cover.uses_dc_cover:
 583             if document.book_info.cover_by:
 584                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 585             if document.book_info.cover_source:
 586                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 587
 588         manifest.append(etree.fromstring(
 589             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 590         manifest.append(etree.fromstring(
 591             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 592         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 593         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 594         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 595
 596     annotations = etree.Element('annotations')
 597
 598     toc_file = etree.fromstring(
 599         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 600         '"-//NISO//DTD ncx 2005-1//EN" '
 601         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 602         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 603         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 604         '</navMap></ncx>'
 605     )
 606     nav_map = toc_file[-1]
 607
 608     if html_toc:
 609         manifest.append(etree.fromstring(
 610             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 611         spine.append(etree.fromstring(
 612             '<itemref idref="html_toc" />'))
 613         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 614
 615     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 616
 617     if len(toc.children) < 2:
 618         toc.add(u"Początek utworu", "part1.html")
 619
 620     # Last modifications in container files and EPUB creation
 621     if len(annotations) > 0:
 622         toc.add("Przypisy", "annotations.html")
 623         manifest.append(etree.fromstring(
 624             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 625         spine.append(etree.fromstring(
 626             '<itemref idref="annotations" />'))
 627         replace_by_verse(annotations)
 628         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 629         chars = chars.union(used_chars(html_tree.getroot()))
 630         zip.writestr('OPS/annotations.html', etree.tostring(
 631             html_tree, pretty_print=True, xml_declaration=True,
 632             encoding="utf-8",
 633             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 634                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 635         ))
 636
 637     toc.add("Wesprzyj Wolne Lektury", "support.html")
 638     manifest.append(etree.fromstring(
 639         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 640     spine.append(etree.fromstring(
 641         '<itemref idref="support" />'))
 642     html_string = open(get_resource('epub/support.html')).read()
 643     chars.update(used_chars(etree.fromstring(html_string)))
 644     zip.writestr('OPS/support.html', html_string)
 645
 646     toc.add("Strona redakcyjna", "last.html")
 647     manifest.append(etree.fromstring(
 648         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 649     spine.append(etree.fromstring(
 650         '<itemref idref="last" />'))
 651     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
 652     chars.update(used_chars(html_tree.getroot()))
 653     zip.writestr('OPS/last.html', etree.tostring(
 654         html_tree, pretty_print=True, xml_declaration=True,
 655         encoding="utf-8",
 656         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 657                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 658     ))
 659
 660     if not flags or 'without-fonts' not in flags:
 661         # strip fonts
 662         tmpdir = mkdtemp('-librarian-epub')
 663         try:
 664             cwd = os.getcwd()
 665         except OSError:
 666             cwd = None
 667
 668         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 669         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 670             optimizer_call = ['perl', 'subset.pl', '--chars',
 671                               ''.join(chars).encode('utf-8'),
 672                               get_resource('fonts/' + fname),
 673                               os.path.join(tmpdir, fname)]
 674             if verbose:
 675                 print "Running font-optimizer"
 676                 subprocess.check_call(optimizer_call)
 677             else:
 678                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 679             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 680             manifest.append(etree.fromstring(
 681                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 682         rmtree(tmpdir)
 683         if cwd is not None:
 684             os.chdir(cwd)
 685     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 686                  xml_declaration=True, encoding="utf-8"))
 687     title = document.book_info.title
 688     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 689     for st in attributes:
 690         meta = toc_file.makeelement(NCXNS('meta'))
 691         meta.set('name', st)
 692         meta.set('content', '0')
 693         toc_file[0].append(meta)
 694     toc_file[0][0].set('content', str(document.book_info.url))
 695     toc_file[0][1].set('content', str(toc.depth()))
 696     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 697
 698     # write TOC
 699     if html_toc:
 700         toc.add(u"Spis treści", "toc.html", index=1)
 701         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 702     toc.write_to_xml(nav_map)
 703     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 704                  xml_declaration=True, encoding="utf-8"))
 705     zip.close()
 706
 707     return OutputFile.from_filename(output_file.name)