librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from lxml import etree
  15 import zipfile
  16 from tempfile import mkdtemp, NamedTemporaryFile
  17 from shutil import rmtree
  18
  19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  20 from librarian.cover import DefaultEbookCover
  21
  22 from librarian import functions, get_resource
  23
  24 from librarian.hyphenator import Hyphenator
  25
  26 functions.reg_person_name()
  27 functions.reg_lang_code_3to2()
  28
  29 def set_hyph_language(source_tree):
  30     def get_short_lng_code(text):
  31         result = ''
  32         text = ''.join(text)
  33         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  34             for line in f:
  35                 list = line.strip().split('|')
  36                 if list[0] == text:
  37                     result=list[2]
  38         if result == '':
  39             return text
  40         else:
  41             return result
  42     bibl_lng = etree.XPath('//dc:language//text()', namespaces = {'dc':str(DCNS)})(source_tree)
  43     short_lng = get_short_lng_code(bibl_lng[0])
  44     try:
  45         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' + short_lng + '.dic'))
  46     except:
  47         pass
  48
  49 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  50     if hyph is not None:
  51         texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  52         for t in texts:
  53             parent = t.getparent()
  54             newt = ''
  55             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  56             for w in wlist:
  57                 newt += hyph.inserted(w, u'\u00AD')
  58             newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  59             if t.is_text:
  60                 parent.text = newt
  61             elif t.is_tail:
  62                 parent.tail = newt
  63
  64 def inner_xml(node):
  65     """ returns node's text and children as a string
  66
  67     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  68     x<b>y</b>z
  69     """
  70
  71     nt = node.text if node.text is not None else ''
  72     return ''.join([nt] + [etree.tostring(child) for child in node])
  73
  74 def set_inner_xml(node, text):
  75     """ sets node's text and children from a string
  76
  77     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  78     >>> set_inner_xml(e, 'x<b>y</b>z')
  79     >>> print etree.tostring(e)
  80     <a>x<b>y</b>z</a>
  81     """
  82
  83     p = etree.fromstring('<x>%s</x>' % text)
  84     node.text = p.text
  85     node[:] = p[:]
  86
  87
  88 def node_name(node):
  89     """ Find out a node's name
  90
  91     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
  92     XYZ
  93     """
  94
  95     tempnode = deepcopy(node)
  96
  97     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
  98         for e in tempnode.findall('.//%s' % p):
  99             t = e.tail
 100             e.clear()
 101             e.tail = t
 102     etree.strip_tags(tempnode, '*')
 103     return tempnode.text
 104
 105
 106 def xslt(xml, sheet):
 107     if isinstance(xml, etree._Element):
 108         xml = etree.ElementTree(xml)
 109     with open(sheet) as xsltf:
 110         return xml.xslt(etree.parse(xsltf))
 111
 112
 113 def replace_characters(node):
 114     def replace_chars(text):
 115         if text is None:
 116             return None
 117         return text.replace(u"\ufeff", u"")\
 118                    .replace("---", u"\u2014")\
 119                    .replace("--", u"\u2013")\
 120                    .replace(",,", u"\u201E")\
 121                    .replace('"', u"\u201D")\
 122                    .replace("'", u"\u2019")
 123     if node.tag in ('uwaga', 'extra'):
 124         t = node.tail
 125         node.clear()
 126         node.tail = t
 127     node.text = replace_chars(node.text)
 128     node.tail = replace_chars(node.tail)
 129     for child in node:
 130         replace_characters(child)
 131
 132
 133 def find_annotations(annotations, source, part_no):
 134     for child in source:
 135         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 136             annotation = deepcopy(child)
 137             number = str(len(annotations)+1)
 138             annotation.set('number', number)
 139             annotation.set('part', str(part_no))
 140             annotation.tail = ''
 141             annotations.append(annotation)
 142             tail = child.tail
 143             child.clear()
 144             child.tail = tail
 145             child.text = number
 146         if child.tag not in ('extra', 'uwaga'):
 147             find_annotations(annotations, child, part_no)
 148
 149
 150 class Stanza(object):
 151     """
 152     Converts / verse endings into verse elements in a stanza.
 153
 154     Slashes may only occur directly in the stanza. Any slashes in subelements
 155     will be ignored, and the subelements will be put inside verse elements.
 156
 157     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 158     >>> Stanza(s).versify()
 159     >>> print etree.tostring(s)
 160     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 161     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 162
 163     """
 164     def __init__(self, stanza_elem):
 165         self.stanza = stanza_elem
 166         self.verses = []
 167         self.open_verse = None
 168
 169     def versify(self):
 170         self.push_text(self.stanza.text)
 171         for elem in self.stanza:
 172             self.push_elem(elem)
 173             self.push_text(elem.tail)
 174         tail = self.stanza.tail
 175         self.stanza.clear()
 176         self.stanza.tail = tail
 177         self.stanza.extend(self.verses)
 178
 179     def open_normal_verse(self):
 180         self.open_verse = self.stanza.makeelement("wers_normalny")
 181         self.verses.append(self.open_verse)
 182
 183     def get_open_verse(self):
 184         if self.open_verse is None:
 185             self.open_normal_verse()
 186         return self.open_verse
 187
 188     def push_text(self, text):
 189         if not text:
 190             return
 191         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 192             if i:
 193                 self.open_normal_verse()
 194             verse = self.get_open_verse()
 195             if len(verse):
 196                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 197             else:
 198                 verse.text = (verse.text or "") + verse_text
 199
 200     def push_elem(self, elem):
 201         if elem.tag.startswith("wers"):
 202             verse = deepcopy(elem)
 203             verse.tail = None
 204             self.verses.append(verse)
 205             self.open_verse = verse
 206         else:
 207             appended = deepcopy(elem)
 208             appended.tail = None
 209             self.get_open_verse().append(appended)
 210
 211
 212 def replace_by_verse(tree):
 213     """ Find stanzas and create new verses in place of a '/' character """
 214
 215     stanzas = tree.findall('.//' + WLNS('strofa'))
 216     for stanza in stanzas:
 217         Stanza(stanza).versify()
 218
 219
 220 def add_to_manifest(manifest, partno):
 221     """ Adds a node to the manifest section in content.opf file """
 222
 223     partstr = 'part%d' % partno
 224     e = manifest.makeelement(OPFNS('item'), attrib={
 225                                  'id': partstr,
 226                                  'href': partstr + '.html',
 227                                  'media-type': 'application/xhtml+xml',
 228                              })
 229     manifest.append(e)
 230
 231
 232 def add_to_spine(spine, partno):
 233     """ Adds a node to the spine section in content.opf file """
 234
 235     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno});
 236     spine.append(e)
 237
 238
 239 class TOC(object):
 240     def __init__(self, name=None, part_href=None):
 241         self.children = []
 242         self.name = name
 243         self.part_href = part_href
 244         self.sub_number = None
 245
 246     def add(self, name, part_href, level=0, is_part=True, index=None):
 247         assert level == 0 or index is None
 248         if level > 0 and self.children:
 249             return self.children[-1].add(name, part_href, level-1, is_part)
 250         else:
 251             t = TOC(name)
 252             t.part_href = part_href
 253             if index is not None:
 254                 self.children.insert(index, t)
 255             else:
 256                 self.children.append(t)
 257             if not is_part:
 258                 t.sub_number = len(self.children) + 1
 259                 return t.sub_number
 260
 261     def append(self, toc):
 262         self.children.append(toc)
 263
 264     def extend(self, toc):
 265         self.children.extend(toc.children)
 266
 267     def depth(self):
 268         if self.children:
 269             return max((c.depth() for c in self.children)) + 1
 270         else:
 271             return 0
 272
 273     def href(self):
 274         src = self.part_href
 275         if self.sub_number is not None:
 276             src += '#sub%d' % self.sub_number
 277         return src
 278
 279     def write_to_xml(self, nav_map, counter=1):
 280         for child in self.children:
 281             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 282             nav_point.set('id', 'NavPoint-%d' % counter)
 283             nav_point.set('playOrder', str(counter))
 284
 285             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 286             text = nav_map.makeelement(NCXNS('text'))
 287             if child.name is not None:
 288                 text.text = re.sub(r'\n', ' ', child.name)
 289             else:
 290                 text.text = child.name
 291             nav_label.append(text)
 292             nav_point.append(nav_label)
 293
 294             content = nav_map.makeelement(NCXNS('content'))
 295             content.set('src', child.href())
 296             nav_point.append(content)
 297             nav_map.append(nav_point)
 298             counter = child.write_to_xml(nav_point, counter + 1)
 299         return counter
 300
 301     def html_part(self, depth=0):
 302         texts = []
 303         for child in self.children:
 304             texts.append(
 305                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 306                 (depth, child.href(), child.name))
 307             texts.append(child.html_part(depth+1))
 308         return "\n".join(texts)
 309
 310     def html(self):
 311         with open(get_resource('epub/toc.html')) as f:
 312             t = unicode(f.read(), 'utf-8')
 313         return t % self.html_part()
 314
 315
 316 def used_chars(element):
 317     """ Lists characters used in an ETree Element """
 318     chars = set((element.text or '') + (element.tail or ''))
 319     for child in element:
 320         chars = chars.union(used_chars(child))
 321     return chars
 322
 323
 324 def chop(main_text):
 325     """ divide main content of the XML file into chunks """
 326
 327     # prepare a container for each chunk
 328     part_xml = etree.Element('utwor')
 329     etree.SubElement(part_xml, 'master')
 330     main_xml_part = part_xml[0] # master
 331
 332     last_node_part = False
 333
 334     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 335     is_scene = False
 336     is_act = False
 337     for one_part in main_text:
 338         name = one_part.tag
 339         if name == 'naglowek_scena':
 340             is_scene = True
 341         elif name == 'naglowek_akt':
 342             is_act = True
 343
 344     for one_part in main_text:
 345         name = one_part.tag
 346         if is_act is False and is_scene is True:
 347             if name == 'naglowek_czesc':
 348                 yield part_xml
 349                 last_node_part = True
 350                 main_xml_part[:] = [deepcopy(one_part)]
 351             elif not last_node_part and name == "naglowek_scena":
 352                 yield part_xml
 353                 main_xml_part[:] = [deepcopy(one_part)]
 354             else:
 355                 main_xml_part.append(deepcopy(one_part))
 356                 last_node_part = False
 357         else:
 358             if name == 'naglowek_czesc':
 359                 yield part_xml
 360                 last_node_part = True
 361                 main_xml_part[:] = [deepcopy(one_part)]
 362             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 363                 yield part_xml
 364                 main_xml_part[:] = [deepcopy(one_part)]
 365             else:
 366                 main_xml_part.append(deepcopy(one_part))
 367                 last_node_part = False
 368     yield part_xml
 369
 370
 371 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 372     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 373
 374     toc = TOC()
 375     for element in chunk_xml[0]:
 376         if element.tag == "naglowek_czesc":
 377             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 378         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 379             toc.add(node_name(element), "part%d.html" % chunk_no)
 380         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 381             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 382             element.set('sub', str(subnumber))
 383     if empty:
 384         if not _empty_html_static:
 385             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 386         chars = set()
 387         output_html = _empty_html_static[0]
 388     else:
 389         find_annotations(annotations, chunk_xml, chunk_no)
 390         replace_by_verse(chunk_xml)
 391         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 392         chars = used_chars(html_tree.getroot())
 393         output_html = etree.tostring(html_tree, pretty_print = True,
 394                     xml_declaration = True,
 395                     encoding = "utf-8",
 396                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 397                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">')
 398     return output_html, toc, chars
 399
 400
 401 def transform(wldoc, verbose=False,
 402               style=None, html_toc=False,
 403               sample=None, cover=None, flags=None):
 404     """ produces a EPUB file
 405
 406     sample=n: generate sample e-book (with at least n paragraphs)
 407     cover: a cover.Cover factory or True for default
 408     flags: less-advertising, without-fonts, working-copy, with-full-fonts
 409     """
 410
 411     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 412         """ processes one input file and proceeds to its children """
 413
 414         replace_characters(wldoc.edoc.getroot())
 415
 416         hyphenator = set_hyph_language(wldoc.edoc.getroot())
 417         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 418
 419
 420         # every input file will have a TOC entry,
 421         # pointing to starting chunk
 422         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 423         chars = set()
 424         if first:
 425             # write book title page
 426             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
 427             chars = used_chars(html_tree.getroot())
 428             zip.writestr('OPS/title.html',
 429                  etree.tostring(html_tree, pretty_print = True,
 430                         xml_declaration = True,
 431                     encoding = "utf-8",
 432                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 433                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
 434             # add a title page TOC entry
 435             toc.add(u"Strona tytułowa", "title.html")
 436         elif wldoc.book_info.parts:
 437             # write title page for every parent
 438             if sample is not None and sample <= 0:
 439                 chars = set()
 440                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 441             else:
 442                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 443                 chars = used_chars(html_tree.getroot())
 444                 html_string = etree.tostring(html_tree,
 445                         pretty_print = True,
 446                     xml_declaration = True,
 447                     encoding = "utf-8",
 448                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 449                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">')
 450             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 451             add_to_manifest(manifest, chunk_counter)
 452             add_to_spine(spine, chunk_counter)
 453             chunk_counter += 1
 454
 455         if len(wldoc.edoc.getroot()) > 1:
 456             # rdf before style master
 457             main_text = wldoc.edoc.getroot()[1]
 458         else:
 459             # rdf in style master
 460             main_text = wldoc.edoc.getroot()[0]
 461             if main_text.tag == RDFNS('RDF'):
 462                 main_text = None
 463
 464         if main_text is not None:
 465             for chunk_xml in chop(main_text):
 466                 empty = False
 467                 if sample is not None:
 468                     if sample <= 0:
 469                         empty = True
 470                     else:
 471                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 472                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 473
 474                 toc.extend(chunk_toc)
 475                 chars = chars.union(chunk_chars)
 476                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 477                 add_to_manifest(manifest, chunk_counter)
 478                 add_to_spine(spine, chunk_counter)
 479                 chunk_counter += 1
 480
 481         for child in wldoc.parts():
 482             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 483                 child, chunk_counter, first=False, sample=sample)
 484             toc.append(child_toc)
 485             chars = chars.union(chunk_chars)
 486
 487         return toc, chunk_counter, chars, sample
 488
 489
 490     document = deepcopy(wldoc)
 491     del wldoc
 492
 493     if flags:
 494         for flag in flags:
 495             document.edoc.getroot().set(flag, 'yes')
 496
 497     # add editors info
 498     document.edoc.getroot().set('editors', u', '.join(sorted(
 499         editor.readable() for editor in document.editors())))
 500     if document.book_info.funders:
 501         document.edoc.getroot().set('funders', u', '.join(
 502             document.book_info.funders))
 503     if document.book_info.thanks:
 504         document.edoc.getroot().set('thanks', document.book_info.thanks)
 505
 506     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 507     manifest = opf.find('.//' + OPFNS('manifest'))
 508     guide = opf.find('.//' + OPFNS('guide'))
 509     spine = opf.find('.//' + OPFNS('spine'))
 510
 511     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 512     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 513
 514     # write static elements
 515     mime = zipfile.ZipInfo()
 516     mime.filename = 'mimetype'
 517     mime.compress_type = zipfile.ZIP_STORED
 518     mime.extra = ''
 519     zip.writestr(mime, 'application/epub+zip')
 520     zip.writestr('META-INF/container.xml', '<?xml version="1.0" ?><container version="1.0" ' \
 521                        'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">' \
 522                        '<rootfiles><rootfile full-path="OPS/content.opf" ' \
 523                        'media-type="application/oebps-package+xml" />' \
 524                        '</rootfiles></container>')
 525     zip.write(get_resource('res/wl-logo-small.png'), os.path.join('OPS', 'logo_wolnelektury.png'))
 526     zip.write(get_resource('res/jedenprocent.png'), os.path.join('OPS', 'jedenprocent.png'))
 527     if not style:
 528         style = get_resource('epub/style.css')
 529     zip.write(style, os.path.join('OPS', 'style.css'))
 530
 531     if cover:
 532         if cover is True:
 533             cover = DefaultEbookCover
 534
 535         cover_file = StringIO()
 536         bound_cover = cover(document.book_info)
 537         bound_cover.save(cover_file)
 538         cover_name = 'cover.%s' % bound_cover.ext()
 539         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 540         del cover_file
 541
 542         cover_tree = etree.parse(get_resource('epub/cover.html'))
 543         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 544         zip.writestr('OPS/cover.html', etree.tostring(
 545                         cover_tree, pretty_print = True, xml_declaration = True, encoding = "utf-8",
 546                         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 547                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
 548
 549         if bound_cover.uses_dc_cover:
 550             if document.book_info.cover_by:
 551                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 552             if document.book_info.cover_source:
 553                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 554
 555         manifest.append(etree.fromstring(
 556             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 557         manifest.append(etree.fromstring(
 558             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 559         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 560         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 561         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 562
 563
 564     annotations = etree.Element('annotations')
 565
 566     toc_file = etree.fromstring('<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC ' \
 567                                '"-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">' \
 568                                '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" ' \
 569                                'version="2005-1"><head></head><docTitle></docTitle><navMap>' \
 570                                '</navMap></ncx>')
 571     nav_map = toc_file[-1]
 572
 573     if html_toc:
 574         manifest.append(etree.fromstring(
 575             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 576         spine.append(etree.fromstring(
 577             '<itemref idref="html_toc" />'))
 578         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 579
 580     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 581
 582     if len(toc.children) < 2:
 583         toc.add(u"Początek utworu", "part1.html")
 584
 585     # Last modifications in container files and EPUB creation
 586     if len(annotations) > 0:
 587         toc.add("Przypisy", "annotations.html")
 588         manifest.append(etree.fromstring(
 589             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 590         spine.append(etree.fromstring(
 591             '<itemref idref="annotations" />'))
 592         replace_by_verse(annotations)
 593         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 594         chars = chars.union(used_chars(html_tree.getroot()))
 595         zip.writestr('OPS/annotations.html', etree.tostring(
 596                             html_tree, pretty_print = True,
 597                                 xml_declaration = True,
 598                                 encoding = "utf-8",
 599                                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 600                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
 601
 602     toc.add("Wesprzyj Wolne Lektury", "support.html")
 603     manifest.append(etree.fromstring(
 604         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 605     spine.append(etree.fromstring(
 606         '<itemref idref="support" />'))
 607     html_string = open(get_resource('epub/support.html')).read()
 608     chars.update(used_chars(etree.fromstring(html_string)))
 609     zip.writestr('OPS/support.html', html_string)
 610
 611     toc.add("Strona redakcyjna", "last.html")
 612     manifest.append(etree.fromstring(
 613         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 614     spine.append(etree.fromstring(
 615         '<itemref idref="last" />'))
 616     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
 617     chars.update(used_chars(html_tree.getroot()))
 618     zip.writestr('OPS/last.html', etree.tostring(
 619                         html_tree, pretty_print = True,
 620                         xml_declaration = True,
 621                         encoding = "utf-8",
 622                         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 623                             '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'))
 624
 625     if not flags or not 'without-fonts' in flags:
 626         # strip fonts
 627         tmpdir = mkdtemp('-librarian-epub')
 628         try:
 629             cwd = os.getcwd()
 630         except OSError:
 631             cwd = None
 632
 633         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 634         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 635             if not flags or not 'with-full-fonts' in flags:
 636                 optimizer_call = ['perl', 'subset.pl', '--chars', ''.join(chars).encode('utf-8'),
 637                               get_resource('fonts/' + fname), os.path.join(tmpdir, fname)]
 638                 if verbose:
 639                     print "Running font-optimizer"
 640                     subprocess.check_call(optimizer_call)
 641                 else:
 642                     subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 643                     zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 644             else:
 645                 zip.write(get_resource('fonts/' + fname), os.path.join('OPS', fname))
 646             manifest.append(etree.fromstring(
 647                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 648         rmtree(tmpdir)
 649         if cwd is not None:
 650             os.chdir(cwd)
 651     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print = True,
 652                     xml_declaration = True,
 653                     encoding = "utf-8"))
 654     title = document.book_info.title
 655     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 656     for st in attributes:
 657         meta = toc_file.makeelement(NCXNS('meta'))
 658         meta.set('name', st)
 659         meta.set('content', '0')
 660         toc_file[0].append(meta)
 661     toc_file[0][0].set('content', str(document.book_info.url))
 662     toc_file[0][1].set('content', str(toc.depth()))
 663     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 664
 665     # write TOC
 666     if html_toc:
 667         toc.add(u"Spis treści", "toc.html", index=1)
 668         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 669     toc.write_to_xml(nav_map)
 670     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print = True,
 671                     xml_declaration = True,
 672                     encoding = "utf-8"))
 673     zip.close()
 674
 675     return OutputFile.from_filename(output_file.name)