librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from lxml import etree
  15 import zipfile
  16 from tempfile import mkdtemp, NamedTemporaryFile
  17 from shutil import rmtree
  18
  19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  20 from librarian.cover import DefaultEbookCover
  21
  22 from librarian import functions, get_resource
  23
  24 from librarian.hyphenator import Hyphenator
  25
  26 functions.reg_person_name()
  27 functions.reg_lang_code_3to2()
  28
  29
  30 def set_hyph_language(source_tree):
  31     def get_short_lng_code(text):
  32         result = ''
  33         text = ''.join(text)
  34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  35             for line in f:
  36                 list = line.strip().split('|')
  37                 if list[0] == text:
  38                     result = list[2]
  39         if result == '':
  40             return text
  41         else:
  42             return result
  43     bibl_lng = etree.XPath('//dc:language//text()',
  44                            namespaces={'dc': str(DCNS)})(source_tree)
  45     short_lng = get_short_lng_code(bibl_lng[0])
  46     try:
  47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  48                                        short_lng + '.dic'))
  49     except:
  50         pass
  51
  52
  53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  54     if hyph is not None:
  55         texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  56         for t in texts:
  57             parent = t.getparent()
  58             newt = ''
  59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  60             for w in wlist:
  61                 newt += hyph.inserted(w, u'\u00AD')
  62             newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  63             if t.is_text:
  64                 parent.text = newt
  65             elif t.is_tail:
  66                 parent.tail = newt
  67
  68
  69 def inner_xml(node):
  70     """ returns node's text and children as a string
  71
  72     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  73     x<b>y</b>z
  74     """
  75
  76     nt = node.text if node.text is not None else ''
  77     return ''.join([nt] + [etree.tostring(child) for child in node])
  78
  79
  80 def set_inner_xml(node, text):
  81     """ sets node's text and children from a string
  82
  83     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  84     >>> set_inner_xml(e, 'x<b>y</b>z')
  85     >>> print etree.tostring(e)
  86     <a>x<b>y</b>z</a>
  87     """
  88
  89     p = etree.fromstring('<x>%s</x>' % text)
  90     node.text = p.text
  91     node[:] = p[:]
  92
  93
  94 def node_name(node):
  95     """ Find out a node's name
  96
  97     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
  98     XYZ
  99     """
 100
 101     tempnode = deepcopy(node)
 102
 103     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 104         for e in tempnode.findall('.//%s' % p):
 105             t = e.tail
 106             e.clear()
 107             e.tail = t
 108     etree.strip_tags(tempnode, '*')
 109     return tempnode.text
 110
 111
 112 def xslt(xml, sheet):
 113     if isinstance(xml, etree._Element):
 114         xml = etree.ElementTree(xml)
 115     with open(sheet) as xsltf:
 116         return xml.xslt(etree.parse(xsltf))
 117
 118
 119 def replace_characters(node):
 120     def replace_chars(text):
 121         if text is None:
 122             return None
 123         return text.replace(u"\ufeff", u"")\
 124                    .replace("---", u"\u2014")\
 125                    .replace("--", u"\u2013")\
 126                    .replace(",,", u"\u201E")\
 127                    .replace('"', u"\u201D")\
 128                    .replace("'", u"\u2019")
 129     if node.tag in ('uwaga', 'extra'):
 130         t = node.tail
 131         node.clear()
 132         node.tail = t
 133     node.text = replace_chars(node.text)
 134     node.tail = replace_chars(node.tail)
 135     for child in node:
 136         replace_characters(child)
 137
 138
 139 def find_annotations(annotations, source, part_no):
 140     for child in source:
 141         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 142             annotation = deepcopy(child)
 143             number = str(len(annotations) + 1)
 144             annotation.set('number', number)
 145             annotation.set('part', str(part_no))
 146             annotation.tail = ''
 147             annotations.append(annotation)
 148             tail = child.tail
 149             child.clear()
 150             child.tail = tail
 151             child.text = number
 152         if child.tag not in ('extra', 'uwaga'):
 153             find_annotations(annotations, child, part_no)
 154
 155
 156 class Stanza(object):
 157     """
 158     Converts / verse endings into verse elements in a stanza.
 159
 160     Slashes may only occur directly in the stanza. Any slashes in subelements
 161     will be ignored, and the subelements will be put inside verse elements.
 162
 163     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 164     >>> Stanza(s).versify()
 165     >>> print etree.tostring(s)
 166     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 167     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 168
 169     """
 170     def __init__(self, stanza_elem):
 171         self.stanza = stanza_elem
 172         self.verses = []
 173         self.open_verse = None
 174
 175     def versify(self):
 176         self.push_text(self.stanza.text)
 177         for elem in self.stanza:
 178             self.push_elem(elem)
 179             self.push_text(elem.tail)
 180         tail = self.stanza.tail
 181         self.stanza.clear()
 182         self.stanza.tail = tail
 183         self.stanza.extend(self.verses)
 184
 185     def open_normal_verse(self):
 186         self.open_verse = self.stanza.makeelement("wers_normalny")
 187         self.verses.append(self.open_verse)
 188
 189     def get_open_verse(self):
 190         if self.open_verse is None:
 191             self.open_normal_verse()
 192         return self.open_verse
 193
 194     def push_text(self, text):
 195         if not text:
 196             return
 197         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 198             if i:
 199                 self.open_normal_verse()
 200             verse = self.get_open_verse()
 201             if len(verse):
 202                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 203             else:
 204                 verse.text = (verse.text or "") + verse_text
 205
 206     def push_elem(self, elem):
 207         if elem.tag.startswith("wers"):
 208             verse = deepcopy(elem)
 209             verse.tail = None
 210             self.verses.append(verse)
 211             self.open_verse = verse
 212         else:
 213             appended = deepcopy(elem)
 214             appended.tail = None
 215             self.get_open_verse().append(appended)
 216
 217
 218 def replace_by_verse(tree):
 219     """ Find stanzas and create new verses in place of a '/' character """
 220
 221     stanzas = tree.findall('.//' + WLNS('strofa'))
 222     for stanza in stanzas:
 223         Stanza(stanza).versify()
 224
 225
 226 def add_to_manifest(manifest, partno):
 227     """ Adds a node to the manifest section in content.opf file """
 228
 229     partstr = 'part%d' % partno
 230     e = manifest.makeelement(
 231         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 232                                'media-type': 'application/xhtml+xml'}
 233     )
 234     manifest.append(e)
 235
 236
 237 def add_to_spine(spine, partno):
 238     """ Adds a node to the spine section in content.opf file """
 239
 240     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 241     spine.append(e)
 242
 243
 244 class TOC(object):
 245     def __init__(self, name=None, part_href=None):
 246         self.children = []
 247         self.name = name
 248         self.part_href = part_href
 249         self.sub_number = None
 250
 251     def add(self, name, part_href, level=0, is_part=True, index=None):
 252         assert level == 0 or index is None
 253         if level > 0 and self.children:
 254             return self.children[-1].add(name, part_href, level - 1, is_part)
 255         else:
 256             t = TOC(name)
 257             t.part_href = part_href
 258             if index is not None:
 259                 self.children.insert(index, t)
 260             else:
 261                 self.children.append(t)
 262             if not is_part:
 263                 t.sub_number = len(self.children) + 1
 264                 return t.sub_number
 265
 266     def append(self, toc):
 267         self.children.append(toc)
 268
 269     def extend(self, toc):
 270         self.children.extend(toc.children)
 271
 272     def depth(self):
 273         if self.children:
 274             return max((c.depth() for c in self.children)) + 1
 275         else:
 276             return 0
 277
 278     def href(self):
 279         src = self.part_href
 280         if self.sub_number is not None:
 281             src += '#sub%d' % self.sub_number
 282         return src
 283
 284     def write_to_xml(self, nav_map, counter=1):
 285         for child in self.children:
 286             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 287             nav_point.set('id', 'NavPoint-%d' % counter)
 288             nav_point.set('playOrder', str(counter))
 289
 290             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 291             text = nav_map.makeelement(NCXNS('text'))
 292             if child.name is not None:
 293                 text.text = re.sub(r'\n', ' ', child.name)
 294             else:
 295                 text.text = child.name
 296             nav_label.append(text)
 297             nav_point.append(nav_label)
 298
 299             content = nav_map.makeelement(NCXNS('content'))
 300             content.set('src', child.href())
 301             nav_point.append(content)
 302             nav_map.append(nav_point)
 303             counter = child.write_to_xml(nav_point, counter + 1)
 304         return counter
 305
 306     def html_part(self, depth=0):
 307         texts = []
 308         for child in self.children:
 309             texts.append(
 310                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 311                 (depth, child.href(), child.name))
 312             texts.append(child.html_part(depth + 1))
 313         return "\n".join(texts)
 314
 315     def html(self):
 316         with open(get_resource('epub/toc.html')) as f:
 317             t = unicode(f.read(), 'utf-8')
 318         return t % self.html_part()
 319
 320
 321 def used_chars(element):
 322     """ Lists characters used in an ETree Element """
 323     chars = set((element.text or '') + (element.tail or ''))
 324     for child in element:
 325         chars = chars.union(used_chars(child))
 326     return chars
 327
 328
 329 def chop(main_text):
 330     """ divide main content of the XML file into chunks """
 331
 332     # prepare a container for each chunk
 333     part_xml = etree.Element('utwor')
 334     etree.SubElement(part_xml, 'master')
 335     main_xml_part = part_xml[0]  # master
 336
 337     last_node_part = False
 338
 339     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 340     is_scene = False
 341     is_act = False
 342     for one_part in main_text:
 343         name = one_part.tag
 344         if name == 'naglowek_scena':
 345             is_scene = True
 346         elif name == 'naglowek_akt':
 347             is_act = True
 348
 349     for one_part in main_text:
 350         name = one_part.tag
 351         if is_act is False and is_scene is True:
 352             if name == 'naglowek_czesc':
 353                 yield part_xml
 354                 last_node_part = True
 355                 main_xml_part[:] = [deepcopy(one_part)]
 356             elif not last_node_part and name == "naglowek_scena":
 357                 yield part_xml
 358                 main_xml_part[:] = [deepcopy(one_part)]
 359             else:
 360                 main_xml_part.append(deepcopy(one_part))
 361                 last_node_part = False
 362         else:
 363             if name == 'naglowek_czesc':
 364                 yield part_xml
 365                 last_node_part = True
 366                 main_xml_part[:] = [deepcopy(one_part)]
 367             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 368                 yield part_xml
 369                 main_xml_part[:] = [deepcopy(one_part)]
 370             else:
 371                 main_xml_part.append(deepcopy(one_part))
 372                 last_node_part = False
 373     yield part_xml
 374
 375
 376 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 377     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 378
 379     toc = TOC()
 380     for element in chunk_xml[0]:
 381         if element.tag == "naglowek_czesc":
 382             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 383         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 384             toc.add(node_name(element), "part%d.html" % chunk_no)
 385         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 386             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 387             element.set('sub', str(subnumber))
 388     if empty:
 389         if not _empty_html_static:
 390             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 391         chars = set()
 392         output_html = _empty_html_static[0]
 393     else:
 394         find_annotations(annotations, chunk_xml, chunk_no)
 395         replace_by_verse(chunk_xml)
 396         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 397         chars = used_chars(html_tree.getroot())
 398         output_html = etree.tostring(
 399             html_tree, pretty_print=True, xml_declaration=True,
 400             encoding="utf-8",
 401             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 402                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 403         )
 404     return output_html, toc, chars
 405
 406
 407 def transform(wldoc, verbose=False,
 408               style=None, html_toc=False,
 409               sample=None, cover=None, flags=None):
 410     """ produces a EPUB file
 411
 412     sample=n: generate sample e-book (with at least n paragraphs)
 413     cover: a cover.Cover factory or True for default
 414     flags: less-advertising, without-fonts, working-copy
 415     """
 416
 417     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 418         """ processes one input file and proceeds to its children """
 419
 420         replace_characters(wldoc.edoc.getroot())
 421
 422         hyphenator = set_hyph_language(wldoc.edoc.getroot())
 423         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 424
 425         # every input file will have a TOC entry,
 426         # pointing to starting chunk
 427         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 428         chars = set()
 429         if first:
 430             # write book title page
 431             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
 432             chars = used_chars(html_tree.getroot())
 433             zip.writestr(
 434                 'OPS/title.html',
 435                 etree.tostring(
 436                     html_tree, pretty_print=True, xml_declaration=True,
 437                     encoding="utf-8",
 438                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 439                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 440                 )
 441             )
 442             # add a title page TOC entry
 443             toc.add(u"Strona tytułowa", "title.html")
 444         elif wldoc.book_info.parts:
 445             # write title page for every parent
 446             if sample is not None and sample <= 0:
 447                 chars = set()
 448                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 449             else:
 450                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 451                 chars = used_chars(html_tree.getroot())
 452                 html_string = etree.tostring(
 453                     html_tree, pretty_print=True, xml_declaration=True,
 454                     encoding="utf-8",
 455                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 456                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 457                 )
 458             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 459             add_to_manifest(manifest, chunk_counter)
 460             add_to_spine(spine, chunk_counter)
 461             chunk_counter += 1
 462
 463         if len(wldoc.edoc.getroot()) > 1:
 464             # rdf before style master
 465             main_text = wldoc.edoc.getroot()[1]
 466         else:
 467             # rdf in style master
 468             main_text = wldoc.edoc.getroot()[0]
 469             if main_text.tag == RDFNS('RDF'):
 470                 main_text = None
 471
 472         if main_text is not None:
 473             for chunk_xml in chop(main_text):
 474                 empty = False
 475                 if sample is not None:
 476                     if sample <= 0:
 477                         empty = True
 478                     else:
 479                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 480                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 481
 482                 toc.extend(chunk_toc)
 483                 chars = chars.union(chunk_chars)
 484                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 485                 add_to_manifest(manifest, chunk_counter)
 486                 add_to_spine(spine, chunk_counter)
 487                 chunk_counter += 1
 488
 489         for child in wldoc.parts():
 490             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 491                 child, chunk_counter, first=False, sample=sample)
 492             toc.append(child_toc)
 493             chars = chars.union(chunk_chars)
 494
 495         return toc, chunk_counter, chars, sample
 496
 497     document = deepcopy(wldoc)
 498     del wldoc
 499
 500     if flags:
 501         for flag in flags:
 502             document.edoc.getroot().set(flag, 'yes')
 503
 504     document.clean_ed_note()
 505     document.clean_ed_note('abstrakt')
 506
 507     # add editors info
 508     editors = document.editors()
 509     if editors:
 510         document.edoc.getroot().set('editors', u', '.join(sorted(
 511             editor.readable() for editor in editors)))
 512     if document.book_info.funders:
 513         document.edoc.getroot().set('funders', u', '.join(
 514             document.book_info.funders))
 515     if document.book_info.thanks:
 516         document.edoc.getroot().set('thanks', document.book_info.thanks)
 517
 518     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 519     manifest = opf.find('.//' + OPFNS('manifest'))
 520     guide = opf.find('.//' + OPFNS('guide'))
 521     spine = opf.find('.//' + OPFNS('spine'))
 522
 523     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 524     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 525
 526     functions.reg_mathml_epub(zip)
 527
 528     # write static elements
 529     mime = zipfile.ZipInfo()
 530     mime.filename = 'mimetype'
 531     mime.compress_type = zipfile.ZIP_STORED
 532     mime.extra = ''
 533     zip.writestr(mime, 'application/epub+zip')
 534     zip.writestr(
 535         'META-INF/container.xml',
 536         '<?xml version="1.0" ?>'
 537         '<container version="1.0" '
 538         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 539         '<rootfiles><rootfile full-path="OPS/content.opf" '
 540         'media-type="application/oebps-package+xml" />'
 541         '</rootfiles></container>'
 542     )
 543     zip.write(get_resource('res/wl-logo-small.png'),
 544               os.path.join('OPS', 'logo_wolnelektury.png'))
 545     zip.write(get_resource('res/jedenprocent.png'),
 546               os.path.join('OPS', 'jedenprocent.png'))
 547     if not style:
 548         style = get_resource('epub/style.css')
 549     zip.write(style, os.path.join('OPS', 'style.css'))
 550
 551     if cover:
 552         if cover is True:
 553             cover = DefaultEbookCover
 554
 555         cover_file = StringIO()
 556         bound_cover = cover(document.book_info)
 557         bound_cover.save(cover_file)
 558         cover_name = 'cover.%s' % bound_cover.ext()
 559         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 560         del cover_file
 561
 562         cover_tree = etree.parse(get_resource('epub/cover.html'))
 563         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 564         zip.writestr('OPS/cover.html', etree.tostring(
 565             cover_tree, pretty_print=True, xml_declaration=True,
 566             encoding="utf-8",
 567             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 568                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 569         ))
 570
 571         if bound_cover.uses_dc_cover:
 572             if document.book_info.cover_by:
 573                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 574             if document.book_info.cover_source:
 575                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 576
 577         manifest.append(etree.fromstring(
 578             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 579         manifest.append(etree.fromstring(
 580             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 581         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 582         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 583         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 584
 585     annotations = etree.Element('annotations')
 586
 587     toc_file = etree.fromstring(
 588         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 589         '"-//NISO//DTD ncx 2005-1//EN" '
 590         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 591         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 592         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 593         '</navMap></ncx>'
 594     )
 595     nav_map = toc_file[-1]
 596
 597     if html_toc:
 598         manifest.append(etree.fromstring(
 599             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 600         spine.append(etree.fromstring(
 601             '<itemref idref="html_toc" />'))
 602         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 603
 604     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 605
 606     if len(toc.children) < 2:
 607         toc.add(u"Początek utworu", "part1.html")
 608
 609     # Last modifications in container files and EPUB creation
 610     if len(annotations) > 0:
 611         toc.add("Przypisy", "annotations.html")
 612         manifest.append(etree.fromstring(
 613             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 614         spine.append(etree.fromstring(
 615             '<itemref idref="annotations" />'))
 616         replace_by_verse(annotations)
 617         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 618         chars = chars.union(used_chars(html_tree.getroot()))
 619         zip.writestr('OPS/annotations.html', etree.tostring(
 620             html_tree, pretty_print=True, xml_declaration=True,
 621             encoding="utf-8",
 622             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 623                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 624         ))
 625
 626     toc.add("Wesprzyj Wolne Lektury", "support.html")
 627     manifest.append(etree.fromstring(
 628         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 629     spine.append(etree.fromstring(
 630         '<itemref idref="support" />'))
 631     html_string = open(get_resource('epub/support.html')).read()
 632     chars.update(used_chars(etree.fromstring(html_string)))
 633     zip.writestr('OPS/support.html', html_string)
 634
 635     toc.add("Strona redakcyjna", "last.html")
 636     manifest.append(etree.fromstring(
 637         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 638     spine.append(etree.fromstring(
 639         '<itemref idref="last" />'))
 640     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
 641     chars.update(used_chars(html_tree.getroot()))
 642     zip.writestr('OPS/last.html', etree.tostring(
 643         html_tree, pretty_print=True, xml_declaration=True,
 644         encoding="utf-8",
 645         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 646                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 647     ))
 648
 649     if not flags or 'without-fonts' not in flags:
 650         # strip fonts
 651         tmpdir = mkdtemp('-librarian-epub')
 652         try:
 653             cwd = os.getcwd()
 654         except OSError:
 655             cwd = None
 656
 657         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 658         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 659             optimizer_call = ['perl', 'subset.pl', '--chars',
 660                               ''.join(chars).encode('utf-8'),
 661                               get_resource('fonts/' + fname),
 662                               os.path.join(tmpdir, fname)]
 663             if verbose:
 664                 print "Running font-optimizer"
 665                 subprocess.check_call(optimizer_call)
 666             else:
 667                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 668             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 669             manifest.append(etree.fromstring(
 670                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 671         rmtree(tmpdir)
 672         if cwd is not None:
 673             os.chdir(cwd)
 674     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 675                  xml_declaration=True, encoding="utf-8"))
 676     title = document.book_info.title
 677     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 678     for st in attributes:
 679         meta = toc_file.makeelement(NCXNS('meta'))
 680         meta.set('name', st)
 681         meta.set('content', '0')
 682         toc_file[0].append(meta)
 683     toc_file[0][0].set('content', str(document.book_info.url))
 684     toc_file[0][1].set('content', str(toc.depth()))
 685     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 686
 687     # write TOC
 688     if html_toc:
 689         toc.add(u"Spis treści", "toc.html", index=1)
 690         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 691     toc.write_to_xml(nav_map)
 692     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 693                  xml_declaration=True, encoding="utf-8"))
 694     zip.close()
 695
 696     return OutputFile.from_filename(output_file.name)