librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from lxml import etree
  15 import zipfile
  16 from tempfile import mkdtemp, NamedTemporaryFile
  17 from shutil import rmtree
  18
  19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  20 from librarian.cover import DefaultEbookCover
  21
  22 from librarian import functions, get_resource
  23
  24 from librarian.hyphenator import Hyphenator
  25
  26 functions.reg_person_name()
  27 functions.reg_lang_code_3to2()
  28
  29
  30 def set_hyph_language(source_tree):
  31     def get_short_lng_code(text):
  32         result = ''
  33         text = ''.join(text)
  34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  35             for line in f:
  36                 list = line.strip().split('|')
  37                 if list[0] == text:
  38                     result = list[2]
  39         if result == '':
  40             return text
  41         else:
  42             return result
  43     bibl_lng = etree.XPath('//dc:language//text()',
  44                            namespaces={'dc': str(DCNS)})(source_tree)
  45     short_lng = get_short_lng_code(bibl_lng[0])
  46     try:
  47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  48                                        short_lng + '.dic'))
  49     except:
  50         pass
  51
  52
  53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  54     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  55     for t in texts:
  56         parent = t.getparent()
  57         if hyph is not None:
  58             newt = ''
  59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  60             for w in wlist:
  61                 newt += hyph.inserted(w, u'\u00AD')
  62         else:
  63             newt = t
  64         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  65         if t.is_text:
  66             parent.text = newt
  67         elif t.is_tail:
  68             parent.tail = newt
  69
  70
  71 def inner_xml(node):
  72     """ returns node's text and children as a string
  73
  74     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  75     x<b>y</b>z
  76     """
  77
  78     nt = node.text if node.text is not None else ''
  79     return ''.join([nt] + [etree.tostring(child) for child in node])
  80
  81
  82 def set_inner_xml(node, text):
  83     """ sets node's text and children from a string
  84
  85     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  86     >>> set_inner_xml(e, 'x<b>y</b>z')
  87     >>> print etree.tostring(e)
  88     <a>x<b>y</b>z</a>
  89     """
  90
  91     p = etree.fromstring('<x>%s</x>' % text)
  92     node.text = p.text
  93     node[:] = p[:]
  94
  95
  96 def node_name(node):
  97     """ Find out a node's name
  98
  99     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 100     XYZ
 101     """
 102
 103     tempnode = deepcopy(node)
 104
 105     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 106         for e in tempnode.findall('.//%s' % p):
 107             t = e.tail
 108             e.clear()
 109             e.tail = t
 110     etree.strip_tags(tempnode, '*')
 111     return tempnode.text
 112
 113
 114 def xslt(xml, sheet):
 115     if isinstance(xml, etree._Element):
 116         xml = etree.ElementTree(xml)
 117     with open(sheet) as xsltf:
 118         return xml.xslt(etree.parse(xsltf))
 119
 120
 121 def replace_characters(node):
 122     def replace_chars(text):
 123         if text is None:
 124             return None
 125         return text.replace(u"\ufeff", u"")\
 126                    .replace("---", u"\u2014")\
 127                    .replace("--", u"\u2013")\
 128                    .replace(",,", u"\u201E")\
 129                    .replace('"', u"\u201D")\
 130                    .replace("'", u"\u2019")
 131     if node.tag in ('uwaga', 'extra'):
 132         t = node.tail
 133         node.clear()
 134         node.tail = t
 135     node.text = replace_chars(node.text)
 136     node.tail = replace_chars(node.tail)
 137     for child in node:
 138         replace_characters(child)
 139
 140
 141 def find_annotations(annotations, source, part_no):
 142     for child in source:
 143         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 144             annotation = deepcopy(child)
 145             number = str(len(annotations) + 1)
 146             annotation.set('number', number)
 147             annotation.set('part', str(part_no))
 148             annotation.tail = ''
 149             annotations.append(annotation)
 150             tail = child.tail
 151             child.clear()
 152             child.tail = tail
 153             child.text = number
 154         if child.tag not in ('extra', 'uwaga'):
 155             find_annotations(annotations, child, part_no)
 156
 157
 158 class Stanza(object):
 159     """
 160     Converts / verse endings into verse elements in a stanza.
 161
 162     Slashes may only occur directly in the stanza. Any slashes in subelements
 163     will be ignored, and the subelements will be put inside verse elements.
 164
 165     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 166     >>> Stanza(s).versify()
 167     >>> print etree.tostring(s)
 168     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 169     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 170
 171     """
 172     def __init__(self, stanza_elem):
 173         self.stanza = stanza_elem
 174         self.verses = []
 175         self.open_verse = None
 176
 177     def versify(self):
 178         self.push_text(self.stanza.text)
 179         for elem in self.stanza:
 180             self.push_elem(elem)
 181             self.push_text(elem.tail)
 182         tail = self.stanza.tail
 183         self.stanza.clear()
 184         self.stanza.tail = tail
 185         self.stanza.extend(self.verses)
 186
 187     def open_normal_verse(self):
 188         self.open_verse = self.stanza.makeelement("wers_normalny")
 189         self.verses.append(self.open_verse)
 190
 191     def get_open_verse(self):
 192         if self.open_verse is None:
 193             self.open_normal_verse()
 194         return self.open_verse
 195
 196     def push_text(self, text):
 197         if not text:
 198             return
 199         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 200             if i:
 201                 self.open_normal_verse()
 202             verse = self.get_open_verse()
 203             if len(verse):
 204                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 205             else:
 206                 verse.text = (verse.text or "") + verse_text
 207
 208     def push_elem(self, elem):
 209         if elem.tag.startswith("wers"):
 210             verse = deepcopy(elem)
 211             verse.tail = None
 212             self.verses.append(verse)
 213             self.open_verse = verse
 214         else:
 215             appended = deepcopy(elem)
 216             appended.tail = None
 217             self.get_open_verse().append(appended)
 218
 219
 220 def replace_by_verse(tree):
 221     """ Find stanzas and create new verses in place of a '/' character """
 222
 223     stanzas = tree.findall('.//' + WLNS('strofa'))
 224     for stanza in stanzas:
 225         Stanza(stanza).versify()
 226
 227
 228 def add_to_manifest(manifest, partno):
 229     """ Adds a node to the manifest section in content.opf file """
 230
 231     partstr = 'part%d' % partno
 232     e = manifest.makeelement(
 233         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 234                                'media-type': 'application/xhtml+xml'}
 235     )
 236     manifest.append(e)
 237
 238
 239 def add_to_spine(spine, partno):
 240     """ Adds a node to the spine section in content.opf file """
 241
 242     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 243     spine.append(e)
 244
 245
 246 class TOC(object):
 247     def __init__(self, name=None, part_href=None):
 248         self.children = []
 249         self.name = name
 250         self.part_href = part_href
 251         self.sub_number = None
 252
 253     def add(self, name, part_href, level=0, is_part=True, index=None):
 254         assert level == 0 or index is None
 255         if level > 0 and self.children:
 256             return self.children[-1].add(name, part_href, level - 1, is_part)
 257         else:
 258             t = TOC(name)
 259             t.part_href = part_href
 260             if index is not None:
 261                 self.children.insert(index, t)
 262             else:
 263                 self.children.append(t)
 264             if not is_part:
 265                 t.sub_number = len(self.children) + 1
 266                 return t.sub_number
 267
 268     def append(self, toc):
 269         self.children.append(toc)
 270
 271     def extend(self, toc):
 272         self.children.extend(toc.children)
 273
 274     def depth(self):
 275         if self.children:
 276             return max((c.depth() for c in self.children)) + 1
 277         else:
 278             return 0
 279
 280     def href(self):
 281         src = self.part_href
 282         if self.sub_number is not None:
 283             src += '#sub%d' % self.sub_number
 284         return src
 285
 286     def write_to_xml(self, nav_map, counter=1):
 287         for child in self.children:
 288             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 289             nav_point.set('id', 'NavPoint-%d' % counter)
 290             nav_point.set('playOrder', str(counter))
 291
 292             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 293             text = nav_map.makeelement(NCXNS('text'))
 294             if child.name is not None:
 295                 text.text = re.sub(r'\n', ' ', child.name)
 296             else:
 297                 text.text = child.name
 298             nav_label.append(text)
 299             nav_point.append(nav_label)
 300
 301             content = nav_map.makeelement(NCXNS('content'))
 302             content.set('src', child.href())
 303             nav_point.append(content)
 304             nav_map.append(nav_point)
 305             counter = child.write_to_xml(nav_point, counter + 1)
 306         return counter
 307
 308     def html_part(self, depth=0):
 309         texts = []
 310         for child in self.children:
 311             texts.append(
 312                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 313                 (depth, child.href(), child.name))
 314             texts.append(child.html_part(depth + 1))
 315         return "\n".join(texts)
 316
 317     def html(self):
 318         with open(get_resource('epub/toc.html')) as f:
 319             t = unicode(f.read(), 'utf-8')
 320         return t % self.html_part()
 321
 322
 323 def used_chars(element):
 324     """ Lists characters used in an ETree Element """
 325     chars = set((element.text or '') + (element.tail or ''))
 326     for child in element:
 327         chars = chars.union(used_chars(child))
 328     return chars
 329
 330
 331 def chop(main_text):
 332     """ divide main content of the XML file into chunks """
 333
 334     # prepare a container for each chunk
 335     part_xml = etree.Element('utwor')
 336     etree.SubElement(part_xml, 'master')
 337     main_xml_part = part_xml[0]  # master
 338
 339     last_node_part = False
 340
 341     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 342     is_scene = False
 343     is_act = False
 344     for one_part in main_text:
 345         name = one_part.tag
 346         if name == 'naglowek_scena':
 347             is_scene = True
 348         elif name == 'naglowek_akt':
 349             is_act = True
 350
 351     for one_part in main_text:
 352         name = one_part.tag
 353         if is_act is False and is_scene is True:
 354             if name == 'naglowek_czesc':
 355                 yield part_xml
 356                 last_node_part = True
 357                 main_xml_part[:] = [deepcopy(one_part)]
 358             elif not last_node_part and name == "naglowek_scena":
 359                 yield part_xml
 360                 main_xml_part[:] = [deepcopy(one_part)]
 361             else:
 362                 main_xml_part.append(deepcopy(one_part))
 363                 last_node_part = False
 364         else:
 365             if name == 'naglowek_czesc':
 366                 yield part_xml
 367                 last_node_part = True
 368                 main_xml_part[:] = [deepcopy(one_part)]
 369             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 370                 yield part_xml
 371                 main_xml_part[:] = [deepcopy(one_part)]
 372             else:
 373                 main_xml_part.append(deepcopy(one_part))
 374                 last_node_part = False
 375     yield part_xml
 376
 377
 378 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 379     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 380
 381     toc = TOC()
 382     for element in chunk_xml[0]:
 383         if element.tag == "naglowek_czesc":
 384             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 385         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 386             toc.add(node_name(element), "part%d.html" % chunk_no)
 387         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 388             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 389             element.set('sub', str(subnumber))
 390     if empty:
 391         if not _empty_html_static:
 392             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 393         chars = set()
 394         output_html = _empty_html_static[0]
 395     else:
 396         find_annotations(annotations, chunk_xml, chunk_no)
 397         replace_by_verse(chunk_xml)
 398         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 399         chars = used_chars(html_tree.getroot())
 400         output_html = etree.tostring(
 401             html_tree, pretty_print=True, xml_declaration=True,
 402             encoding="utf-8",
 403             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 404                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 405         )
 406     return output_html, toc, chars
 407
 408
 409 def transform(wldoc, verbose=False,
 410               style=None, html_toc=False,
 411               sample=None, cover=None, flags=None, hyphenate=False):
 412     """ produces a EPUB file
 413
 414     sample=n: generate sample e-book (with at least n paragraphs)
 415     cover: a cover.Cover factory or True for default
 416     flags: less-advertising, without-fonts, working-copy
 417     """
 418
 419     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 420         """ processes one input file and proceeds to its children """
 421
 422         replace_characters(wldoc.edoc.getroot())
 423
 424         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 425         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 426
 427         # every input file will have a TOC entry,
 428         # pointing to starting chunk
 429         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 430         chars = set()
 431         if first:
 432             # write book title page
 433             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
 434             chars = used_chars(html_tree.getroot())
 435             zip.writestr(
 436                 'OPS/title.html',
 437                 etree.tostring(
 438                     html_tree, pretty_print=True, xml_declaration=True,
 439                     encoding="utf-8",
 440                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 441                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 442                 )
 443             )
 444             # add a title page TOC entry
 445             toc.add(u"Strona tytułowa", "title.html")
 446         elif wldoc.book_info.parts:
 447             # write title page for every parent
 448             if sample is not None and sample <= 0:
 449                 chars = set()
 450                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 451             else:
 452                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 453                 chars = used_chars(html_tree.getroot())
 454                 html_string = etree.tostring(
 455                     html_tree, pretty_print=True, xml_declaration=True,
 456                     encoding="utf-8",
 457                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 458                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 459                 )
 460             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 461             add_to_manifest(manifest, chunk_counter)
 462             add_to_spine(spine, chunk_counter)
 463             chunk_counter += 1
 464
 465         if len(wldoc.edoc.getroot()) > 1:
 466             # rdf before style master
 467             main_text = wldoc.edoc.getroot()[1]
 468         else:
 469             # rdf in style master
 470             main_text = wldoc.edoc.getroot()[0]
 471             if main_text.tag == RDFNS('RDF'):
 472                 main_text = None
 473
 474         if main_text is not None:
 475             for chunk_xml in chop(main_text):
 476                 empty = False
 477                 if sample is not None:
 478                     if sample <= 0:
 479                         empty = True
 480                     else:
 481                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 482                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 483
 484                 toc.extend(chunk_toc)
 485                 chars = chars.union(chunk_chars)
 486                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 487                 add_to_manifest(manifest, chunk_counter)
 488                 add_to_spine(spine, chunk_counter)
 489                 chunk_counter += 1
 490
 491         for child in wldoc.parts():
 492             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 493                 child, chunk_counter, first=False, sample=sample)
 494             toc.append(child_toc)
 495             chars = chars.union(chunk_chars)
 496
 497         return toc, chunk_counter, chars, sample
 498
 499     document = deepcopy(wldoc)
 500     del wldoc
 501
 502     if flags:
 503         for flag in flags:
 504             document.edoc.getroot().set(flag, 'yes')
 505
 506     document.clean_ed_note()
 507     document.clean_ed_note('abstrakt')
 508
 509     # add editors info
 510     editors = document.editors()
 511     if editors:
 512         document.edoc.getroot().set('editors', u', '.join(sorted(
 513             editor.readable() for editor in editors)))
 514     if document.book_info.funders:
 515         document.edoc.getroot().set('funders', u', '.join(
 516             document.book_info.funders))
 517     if document.book_info.thanks:
 518         document.edoc.getroot().set('thanks', document.book_info.thanks)
 519
 520     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 521     manifest = opf.find('.//' + OPFNS('manifest'))
 522     guide = opf.find('.//' + OPFNS('guide'))
 523     spine = opf.find('.//' + OPFNS('spine'))
 524
 525     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 526     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 527
 528     functions.reg_mathml_epub(zip)
 529
 530     # write static elements
 531     mime = zipfile.ZipInfo()
 532     mime.filename = 'mimetype'
 533     mime.compress_type = zipfile.ZIP_STORED
 534     mime.extra = ''
 535     zip.writestr(mime, 'application/epub+zip')
 536     zip.writestr(
 537         'META-INF/container.xml',
 538         '<?xml version="1.0" ?>'
 539         '<container version="1.0" '
 540         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 541         '<rootfiles><rootfile full-path="OPS/content.opf" '
 542         'media-type="application/oebps-package+xml" />'
 543         '</rootfiles></container>'
 544     )
 545     zip.write(get_resource('res/wl-logo-small.png'),
 546               os.path.join('OPS', 'logo_wolnelektury.png'))
 547     zip.write(get_resource('res/jedenprocent.png'),
 548               os.path.join('OPS', 'jedenprocent.png'))
 549     if not style:
 550         style = get_resource('epub/style.css')
 551     zip.write(style, os.path.join('OPS', 'style.css'))
 552
 553     if cover:
 554         if cover is True:
 555             cover = DefaultEbookCover
 556
 557         cover_file = StringIO()
 558         bound_cover = cover(document.book_info)
 559         bound_cover.save(cover_file)
 560         cover_name = 'cover.%s' % bound_cover.ext()
 561         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 562         del cover_file
 563
 564         cover_tree = etree.parse(get_resource('epub/cover.html'))
 565         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 566         zip.writestr('OPS/cover.html', etree.tostring(
 567             cover_tree, pretty_print=True, xml_declaration=True,
 568             encoding="utf-8",
 569             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 570                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 571         ))
 572
 573         if bound_cover.uses_dc_cover:
 574             if document.book_info.cover_by:
 575                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 576             if document.book_info.cover_source:
 577                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 578
 579         manifest.append(etree.fromstring(
 580             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 581         manifest.append(etree.fromstring(
 582             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 583         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 584         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 585         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 586
 587     annotations = etree.Element('annotations')
 588
 589     toc_file = etree.fromstring(
 590         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 591         '"-//NISO//DTD ncx 2005-1//EN" '
 592         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 593         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 594         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 595         '</navMap></ncx>'
 596     )
 597     nav_map = toc_file[-1]
 598
 599     if html_toc:
 600         manifest.append(etree.fromstring(
 601             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 602         spine.append(etree.fromstring(
 603             '<itemref idref="html_toc" />'))
 604         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 605
 606     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 607
 608     if len(toc.children) < 2:
 609         toc.add(u"Początek utworu", "part1.html")
 610
 611     # Last modifications in container files and EPUB creation
 612     if len(annotations) > 0:
 613         toc.add("Przypisy", "annotations.html")
 614         manifest.append(etree.fromstring(
 615             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 616         spine.append(etree.fromstring(
 617             '<itemref idref="annotations" />'))
 618         replace_by_verse(annotations)
 619         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 620         chars = chars.union(used_chars(html_tree.getroot()))
 621         zip.writestr('OPS/annotations.html', etree.tostring(
 622             html_tree, pretty_print=True, xml_declaration=True,
 623             encoding="utf-8",
 624             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 625                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 626         ))
 627
 628     toc.add("Wesprzyj Wolne Lektury", "support.html")
 629     manifest.append(etree.fromstring(
 630         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 631     spine.append(etree.fromstring(
 632         '<itemref idref="support" />'))
 633     html_string = open(get_resource('epub/support.html')).read()
 634     chars.update(used_chars(etree.fromstring(html_string)))
 635     zip.writestr('OPS/support.html', html_string)
 636
 637     toc.add("Strona redakcyjna", "last.html")
 638     manifest.append(etree.fromstring(
 639         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 640     spine.append(etree.fromstring(
 641         '<itemref idref="last" />'))
 642     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
 643     chars.update(used_chars(html_tree.getroot()))
 644     zip.writestr('OPS/last.html', etree.tostring(
 645         html_tree, pretty_print=True, xml_declaration=True,
 646         encoding="utf-8",
 647         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 648                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 649     ))
 650
 651     if not flags or 'without-fonts' not in flags:
 652         # strip fonts
 653         tmpdir = mkdtemp('-librarian-epub')
 654         try:
 655             cwd = os.getcwd()
 656         except OSError:
 657             cwd = None
 658
 659         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 660         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 661             optimizer_call = ['perl', 'subset.pl', '--chars',
 662                               ''.join(chars).encode('utf-8'),
 663                               get_resource('fonts/' + fname),
 664                               os.path.join(tmpdir, fname)]
 665             if verbose:
 666                 print "Running font-optimizer"
 667                 subprocess.check_call(optimizer_call)
 668             else:
 669                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 670             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 671             manifest.append(etree.fromstring(
 672                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 673         rmtree(tmpdir)
 674         if cwd is not None:
 675             os.chdir(cwd)
 676     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 677                  xml_declaration=True, encoding="utf-8"))
 678     title = document.book_info.title
 679     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 680     for st in attributes:
 681         meta = toc_file.makeelement(NCXNS('meta'))
 682         meta.set('name', st)
 683         meta.set('content', '0')
 684         toc_file[0].append(meta)
 685     toc_file[0][0].set('content', str(document.book_info.url))
 686     toc_file[0][1].set('content', str(toc.depth()))
 687     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 688
 689     # write TOC
 690     if html_toc:
 691         toc.add(u"Spis treści", "toc.html", index=1)
 692         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 693     toc.write_to_xml(nav_map)
 694     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 695                  xml_declaration=True, encoding="utf-8"))
 696     zip.close()
 697
 698     return OutputFile.from_filename(output_file.name)