librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from lxml import etree
  15 import zipfile
  16 from tempfile import mkdtemp, NamedTemporaryFile
  17 from shutil import rmtree
  18
  19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  20 from librarian.cover import DefaultEbookCover
  21
  22 from librarian import functions, get_resource
  23
  24 from librarian.hyphenator import Hyphenator
  25
  26 functions.reg_person_name()
  27 functions.reg_lang_code_3to2()
  28
  29
  30 def set_hyph_language(source_tree):
  31     def get_short_lng_code(text):
  32         result = ''
  33         text = ''.join(text)
  34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  35             for line in f:
  36                 list = line.strip().split('|')
  37                 if list[0] == text:
  38                     result = list[2]
  39         if result == '':
  40             return text
  41         else:
  42             return result
  43     bibl_lng = etree.XPath('//dc:language//text()',
  44                            namespaces={'dc': str(DCNS)})(source_tree)
  45     short_lng = get_short_lng_code(bibl_lng[0])
  46     try:
  47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  48                                        short_lng + '.dic'))
  49     except:
  50         pass
  51
  52
  53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  54     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  55     for t in texts:
  56         parent = t.getparent()
  57         if hyph is not None:
  58             newt = ''
  59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  60             for w in wlist:
  61                 newt += hyph.inserted(w, u'\u00AD')
  62         else:
  63             newt = t
  64         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  65         if t.is_text:
  66             parent.text = newt
  67         elif t.is_tail:
  68             parent.tail = newt
  69
  70
  71 def inner_xml(node):
  72     """ returns node's text and children as a string
  73
  74     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  75     x<b>y</b>z
  76     """
  77
  78     nt = node.text if node.text is not None else ''
  79     return ''.join([nt] + [etree.tostring(child) for child in node])
  80
  81
  82 def set_inner_xml(node, text):
  83     """ sets node's text and children from a string
  84
  85     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  86     >>> set_inner_xml(e, 'x<b>y</b>z')
  87     >>> print etree.tostring(e)
  88     <a>x<b>y</b>z</a>
  89     """
  90
  91     p = etree.fromstring('<x>%s</x>' % text)
  92     node.text = p.text
  93     node[:] = p[:]
  94
  95
  96 def node_name(node):
  97     """ Find out a node's name
  98
  99     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 100     XYZ
 101     """
 102
 103     tempnode = deepcopy(node)
 104
 105     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 106         for e in tempnode.findall('.//%s' % p):
 107             t = e.tail
 108             e.clear()
 109             e.tail = t
 110     etree.strip_tags(tempnode, '*')
 111     return tempnode.text
 112
 113
 114 def xslt(xml, sheet):
 115     if isinstance(xml, etree._Element):
 116         xml = etree.ElementTree(xml)
 117     with open(sheet) as xsltf:
 118         return xml.xslt(etree.parse(xsltf))
 119
 120
 121 def replace_characters(node):
 122     def replace_chars(text):
 123         if text is None:
 124             return None
 125         return text.replace(u"\ufeff", u"")\
 126                    .replace("---", u"\u2014")\
 127                    .replace("--", u"\u2013")\
 128                    .replace(",,", u"\u201E")\
 129                    .replace('"', u"\u201D")\
 130                    .replace("'", u"\u2019")
 131     if node.tag in ('uwaga', 'extra'):
 132         t = node.tail
 133         node.clear()
 134         node.tail = t
 135     node.text = replace_chars(node.text)
 136     node.tail = replace_chars(node.tail)
 137     for child in node:
 138         replace_characters(child)
 139
 140
 141 def find_annotations(annotations, source, part_no):
 142     for child in source:
 143         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 144             annotation = deepcopy(child)
 145             number = str(len(annotations) + 1)
 146             annotation.set('number', number)
 147             annotation.set('part', str(part_no))
 148             annotation.tail = ''
 149             annotations.append(annotation)
 150             tail = child.tail
 151             child.clear()
 152             child.tail = tail
 153             child.text = number
 154         if child.tag not in ('extra', 'uwaga'):
 155             find_annotations(annotations, child, part_no)
 156
 157
 158 class Stanza(object):
 159     """
 160     Converts / verse endings into verse elements in a stanza.
 161
 162     Slashes may only occur directly in the stanza. Any slashes in subelements
 163     will be ignored, and the subelements will be put inside verse elements.
 164
 165     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 166     >>> Stanza(s).versify()
 167     >>> print etree.tostring(s)
 168     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 169     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 170
 171     """
 172     def __init__(self, stanza_elem):
 173         self.stanza = stanza_elem
 174         self.verses = []
 175         self.open_verse = None
 176
 177     def versify(self):
 178         self.push_text(self.stanza.text)
 179         for elem in self.stanza:
 180             self.push_elem(elem)
 181             self.push_text(elem.tail)
 182         tail = self.stanza.tail
 183         self.stanza.clear()
 184         self.stanza.tail = tail
 185         self.stanza.extend(self.verses)
 186
 187     def open_normal_verse(self):
 188         self.open_verse = self.stanza.makeelement("wers_normalny")
 189         self.verses.append(self.open_verse)
 190
 191     def get_open_verse(self):
 192         if self.open_verse is None:
 193             self.open_normal_verse()
 194         return self.open_verse
 195
 196     def push_text(self, text):
 197         if not text:
 198             return
 199         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 200             if i:
 201                 self.open_normal_verse()
 202             verse = self.get_open_verse()
 203             if len(verse):
 204                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 205             else:
 206                 verse.text = (verse.text or "") + verse_text
 207
 208     def push_elem(self, elem):
 209         if elem.tag.startswith("wers"):
 210             verse = deepcopy(elem)
 211             verse.tail = None
 212             self.verses.append(verse)
 213             self.open_verse = verse
 214         else:
 215             appended = deepcopy(elem)
 216             appended.tail = None
 217             self.get_open_verse().append(appended)
 218
 219
 220 def replace_by_verse(tree):
 221     """ Find stanzas and create new verses in place of a '/' character """
 222
 223     stanzas = tree.findall('.//' + WLNS('strofa'))
 224     for stanza in stanzas:
 225         Stanza(stanza).versify()
 226
 227
 228 def add_to_manifest(manifest, partno):
 229     """ Adds a node to the manifest section in content.opf file """
 230
 231     partstr = 'part%d' % partno
 232     e = manifest.makeelement(
 233         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 234                                'media-type': 'application/xhtml+xml'}
 235     )
 236     manifest.append(e)
 237
 238
 239 def add_to_spine(spine, partno):
 240     """ Adds a node to the spine section in content.opf file """
 241
 242     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 243     spine.append(e)
 244
 245
 246 class TOC(object):
 247     def __init__(self, name=None, part_href=None):
 248         self.children = []
 249         self.name = name
 250         self.part_href = part_href
 251         self.sub_number = None
 252
 253     def add(self, name, part_href, level=0, is_part=True, index=None):
 254         assert level == 0 or index is None
 255         if level > 0 and self.children:
 256             return self.children[-1].add(name, part_href, level - 1, is_part)
 257         else:
 258             t = TOC(name)
 259             t.part_href = part_href
 260             if index is not None:
 261                 self.children.insert(index, t)
 262             else:
 263                 self.children.append(t)
 264             if not is_part:
 265                 t.sub_number = len(self.children) + 1
 266                 return t.sub_number
 267
 268     def append(self, toc):
 269         self.children.append(toc)
 270
 271     def extend(self, toc):
 272         self.children.extend(toc.children)
 273
 274     def depth(self):
 275         if self.children:
 276             return max((c.depth() for c in self.children)) + 1
 277         else:
 278             return 0
 279
 280     def href(self):
 281         src = self.part_href
 282         if self.sub_number is not None:
 283             src += '#sub%d' % self.sub_number
 284         return src
 285
 286     def write_to_xml(self, nav_map, counter=1):
 287         for child in self.children:
 288             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 289             nav_point.set('id', 'NavPoint-%d' % counter)
 290             nav_point.set('playOrder', str(counter))
 291
 292             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 293             text = nav_map.makeelement(NCXNS('text'))
 294             if child.name is not None:
 295                 text.text = re.sub(r'\n', ' ', child.name)
 296             else:
 297                 text.text = child.name
 298             nav_label.append(text)
 299             nav_point.append(nav_label)
 300
 301             content = nav_map.makeelement(NCXNS('content'))
 302             content.set('src', child.href())
 303             nav_point.append(content)
 304             nav_map.append(nav_point)
 305             counter = child.write_to_xml(nav_point, counter + 1)
 306         return counter
 307
 308     def html_part(self, depth=0):
 309         texts = []
 310         for child in self.children:
 311             texts.append(
 312                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 313                 (depth, child.href(), child.name))
 314             texts.append(child.html_part(depth + 1))
 315         return "\n".join(texts)
 316
 317     def html(self):
 318         with open(get_resource('epub/toc.html')) as f:
 319             t = unicode(f.read(), 'utf-8')
 320         return t % self.html_part()
 321
 322
 323 def used_chars(element):
 324     """ Lists characters used in an ETree Element """
 325     chars = set((element.text or '') + (element.tail or ''))
 326     for child in element:
 327         chars = chars.union(used_chars(child))
 328     return chars
 329
 330
 331 def chop(main_text):
 332     """ divide main content of the XML file into chunks """
 333
 334     # prepare a container for each chunk
 335     part_xml = etree.Element('utwor')
 336     etree.SubElement(part_xml, 'master')
 337     main_xml_part = part_xml[0]  # master
 338
 339     last_node_part = False
 340
 341     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 342     is_scene = False
 343     is_act = False
 344     for one_part in main_text:
 345         name = one_part.tag
 346         if name == 'naglowek_scena':
 347             is_scene = True
 348         elif name == 'naglowek_akt':
 349             is_act = True
 350
 351     for one_part in main_text:
 352         name = one_part.tag
 353         if is_act is False and is_scene is True:
 354             if name == 'naglowek_czesc':
 355                 yield part_xml
 356                 last_node_part = True
 357                 main_xml_part[:] = [deepcopy(one_part)]
 358             elif not last_node_part and name == "naglowek_scena":
 359                 yield part_xml
 360                 main_xml_part[:] = [deepcopy(one_part)]
 361             else:
 362                 main_xml_part.append(deepcopy(one_part))
 363                 last_node_part = False
 364         else:
 365             if name == 'naglowek_czesc':
 366                 yield part_xml
 367                 last_node_part = True
 368                 main_xml_part[:] = [deepcopy(one_part)]
 369             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 370                 yield part_xml
 371                 main_xml_part[:] = [deepcopy(one_part)]
 372             else:
 373                 main_xml_part.append(deepcopy(one_part))
 374                 last_node_part = False
 375     yield part_xml
 376
 377
 378 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 379     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 380
 381     toc = TOC()
 382     for element in chunk_xml[0]:
 383         if element.tag == "naglowek_czesc":
 384             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 385         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 386             toc.add(node_name(element), "part%d.html" % chunk_no)
 387         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 388             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 389             element.set('sub', str(subnumber))
 390     if empty:
 391         if not _empty_html_static:
 392             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 393         chars = set()
 394         output_html = _empty_html_static[0]
 395     else:
 396         find_annotations(annotations, chunk_xml, chunk_no)
 397         replace_by_verse(chunk_xml)
 398         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 399         chars = used_chars(html_tree.getroot())
 400         output_html = etree.tostring(
 401             html_tree, pretty_print=True, xml_declaration=True,
 402             encoding="utf-8",
 403             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 404                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 405         )
 406     return output_html, toc, chars
 407
 408
 409 def transform(wldoc, verbose=False, style=None, html_toc=False,
 410               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path=''):
 411     """ produces a EPUB file
 412
 413     sample=n: generate sample e-book (with at least n paragraphs)
 414     cover: a cover.Cover factory or True for default
 415     flags: less-advertising, without-fonts, working-copy
 416     """
 417
 418     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 419         """ processes one input file and proceeds to its children """
 420
 421         replace_characters(wldoc.edoc.getroot())
 422
 423         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 424         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 425
 426         # every input file will have a TOC entry,
 427         # pointing to starting chunk
 428         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 429         chars = set()
 430         if first:
 431             # write book title page
 432             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
 433             chars = used_chars(html_tree.getroot())
 434             zip.writestr(
 435                 'OPS/title.html',
 436                 etree.tostring(
 437                     html_tree, pretty_print=True, xml_declaration=True,
 438                     encoding="utf-8",
 439                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 440                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 441                 )
 442             )
 443             # add a title page TOC entry
 444             toc.add(u"Strona tytułowa", "title.html")
 445         elif wldoc.book_info.parts:
 446             # write title page for every parent
 447             if sample is not None and sample <= 0:
 448                 chars = set()
 449                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 450             else:
 451                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 452                 chars = used_chars(html_tree.getroot())
 453                 html_string = etree.tostring(
 454                     html_tree, pretty_print=True, xml_declaration=True,
 455                     encoding="utf-8",
 456                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 457                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 458                 )
 459             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 460             add_to_manifest(manifest, chunk_counter)
 461             add_to_spine(spine, chunk_counter)
 462             chunk_counter += 1
 463
 464         if len(wldoc.edoc.getroot()) > 1:
 465             # rdf before style master
 466             main_text = wldoc.edoc.getroot()[1]
 467         else:
 468             # rdf in style master
 469             main_text = wldoc.edoc.getroot()[0]
 470             if main_text.tag == RDFNS('RDF'):
 471                 main_text = None
 472
 473         if main_text is not None:
 474             for chunk_xml in chop(main_text):
 475                 empty = False
 476                 if sample is not None:
 477                     if sample <= 0:
 478                         empty = True
 479                     else:
 480                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 481                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 482
 483                 toc.extend(chunk_toc)
 484                 chars = chars.union(chunk_chars)
 485                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 486                 add_to_manifest(manifest, chunk_counter)
 487                 add_to_spine(spine, chunk_counter)
 488                 chunk_counter += 1
 489
 490         for child in wldoc.parts():
 491             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 492                 child, chunk_counter, first=False, sample=sample)
 493             toc.append(child_toc)
 494             chars = chars.union(chunk_chars)
 495
 496         return toc, chunk_counter, chars, sample
 497
 498     document = deepcopy(wldoc)
 499     del wldoc
 500
 501     if flags:
 502         for flag in flags:
 503             document.edoc.getroot().set(flag, 'yes')
 504
 505     document.clean_ed_note()
 506     document.clean_ed_note('abstrakt')
 507
 508     # add editors info
 509     editors = document.editors()
 510     if editors:
 511         document.edoc.getroot().set('editors', u', '.join(sorted(
 512             editor.readable() for editor in editors)))
 513     if document.book_info.funders:
 514         document.edoc.getroot().set('funders', u', '.join(
 515             document.book_info.funders))
 516     if document.book_info.thanks:
 517         document.edoc.getroot().set('thanks', document.book_info.thanks)
 518
 519     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 520     manifest = opf.find('.//' + OPFNS('manifest'))
 521     guide = opf.find('.//' + OPFNS('guide'))
 522     spine = opf.find('.//' + OPFNS('spine'))
 523
 524     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 525     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 526
 527     functions.reg_mathml_epub(zip)
 528
 529     for filename in os.listdir(ilustr_path):
 530         zip.write(os.path.join(ilustr_path, filename), os.path.join('OPS', filename))
 531
 532     # write static elements
 533     mime = zipfile.ZipInfo()
 534     mime.filename = 'mimetype'
 535     mime.compress_type = zipfile.ZIP_STORED
 536     mime.extra = ''
 537     zip.writestr(mime, 'application/epub+zip')
 538     zip.writestr(
 539         'META-INF/container.xml',
 540         '<?xml version="1.0" ?>'
 541         '<container version="1.0" '
 542         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 543         '<rootfiles><rootfile full-path="OPS/content.opf" '
 544         'media-type="application/oebps-package+xml" />'
 545         '</rootfiles></container>'
 546     )
 547     zip.write(get_resource('res/wl-logo-small.png'),
 548               os.path.join('OPS', 'logo_wolnelektury.png'))
 549     zip.write(get_resource('res/jedenprocent.png'),
 550               os.path.join('OPS', 'jedenprocent.png'))
 551     if not style:
 552         style = get_resource('epub/style.css')
 553     zip.write(style, os.path.join('OPS', 'style.css'))
 554
 555     if cover:
 556         if cover is True:
 557             cover = DefaultEbookCover
 558
 559         cover_file = StringIO()
 560         bound_cover = cover(document.book_info)
 561         bound_cover.save(cover_file)
 562         cover_name = 'cover.%s' % bound_cover.ext()
 563         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 564         del cover_file
 565
 566         cover_tree = etree.parse(get_resource('epub/cover.html'))
 567         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 568         zip.writestr('OPS/cover.html', etree.tostring(
 569             cover_tree, pretty_print=True, xml_declaration=True,
 570             encoding="utf-8",
 571             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 572                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 573         ))
 574
 575         if bound_cover.uses_dc_cover:
 576             if document.book_info.cover_by:
 577                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 578             if document.book_info.cover_source:
 579                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 580
 581         manifest.append(etree.fromstring(
 582             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 583         manifest.append(etree.fromstring(
 584             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 585         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 586         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 587         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 588
 589     annotations = etree.Element('annotations')
 590
 591     toc_file = etree.fromstring(
 592         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 593         '"-//NISO//DTD ncx 2005-1//EN" '
 594         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 595         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 596         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 597         '</navMap></ncx>'
 598     )
 599     nav_map = toc_file[-1]
 600
 601     if html_toc:
 602         manifest.append(etree.fromstring(
 603             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 604         spine.append(etree.fromstring(
 605             '<itemref idref="html_toc" />'))
 606         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 607
 608     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 609
 610     if len(toc.children) < 2:
 611         toc.add(u"Początek utworu", "part1.html")
 612
 613     # Last modifications in container files and EPUB creation
 614     if len(annotations) > 0:
 615         toc.add("Przypisy", "annotations.html")
 616         manifest.append(etree.fromstring(
 617             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 618         spine.append(etree.fromstring(
 619             '<itemref idref="annotations" />'))
 620         replace_by_verse(annotations)
 621         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 622         chars = chars.union(used_chars(html_tree.getroot()))
 623         zip.writestr('OPS/annotations.html', etree.tostring(
 624             html_tree, pretty_print=True, xml_declaration=True,
 625             encoding="utf-8",
 626             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 627                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 628         ))
 629
 630     toc.add("Wesprzyj Wolne Lektury", "support.html")
 631     manifest.append(etree.fromstring(
 632         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 633     spine.append(etree.fromstring(
 634         '<itemref idref="support" />'))
 635     html_string = open(get_resource('epub/support.html')).read()
 636     chars.update(used_chars(etree.fromstring(html_string)))
 637     zip.writestr('OPS/support.html', html_string)
 638
 639     toc.add("Strona redakcyjna", "last.html")
 640     manifest.append(etree.fromstring(
 641         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 642     spine.append(etree.fromstring(
 643         '<itemref idref="last" />'))
 644     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
 645     chars.update(used_chars(html_tree.getroot()))
 646     zip.writestr('OPS/last.html', etree.tostring(
 647         html_tree, pretty_print=True, xml_declaration=True,
 648         encoding="utf-8",
 649         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 650                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 651     ))
 652
 653     if not flags or 'without-fonts' not in flags:
 654         # strip fonts
 655         tmpdir = mkdtemp('-librarian-epub')
 656         try:
 657             cwd = os.getcwd()
 658         except OSError:
 659             cwd = None
 660
 661         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 662         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 663             optimizer_call = ['perl', 'subset.pl', '--chars',
 664                               ''.join(chars).encode('utf-8'),
 665                               get_resource('fonts/' + fname),
 666                               os.path.join(tmpdir, fname)]
 667             if verbose:
 668                 print "Running font-optimizer"
 669                 subprocess.check_call(optimizer_call)
 670             else:
 671                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 672             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 673             manifest.append(etree.fromstring(
 674                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 675         rmtree(tmpdir)
 676         if cwd is not None:
 677             os.chdir(cwd)
 678     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 679                  xml_declaration=True, encoding="utf-8"))
 680     title = document.book_info.title
 681     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 682     for st in attributes:
 683         meta = toc_file.makeelement(NCXNS('meta'))
 684         meta.set('name', st)
 685         meta.set('content', '0')
 686         toc_file[0].append(meta)
 687     toc_file[0][0].set('content', str(document.book_info.url))
 688     toc_file[0][1].set('content', str(toc.depth()))
 689     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 690
 691     # write TOC
 692     if html_toc:
 693         toc.add(u"Spis treści", "toc.html", index=1)
 694         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 695     toc.write_to_xml(nav_map)
 696     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 697                  xml_declaration=True, encoding="utf-8"))
 698     zip.close()
 699
 700     return OutputFile.from_filename(output_file.name)