librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import make_cover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def squeeze_whitespace(s):
  33     return re.sub(r'\s+', ' ', s)
  34
  35
  36 def set_hyph_language(source_tree):
  37     def get_short_lng_code(text):
  38         result = ''
  39         text = ''.join(text)
  40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  41             for line in f:
  42                 list = line.strip().split('|')
  43                 if list[0] == text:
  44                     result = list[2]
  45         if result == '':
  46             return text
  47         else:
  48             return result
  49     bibl_lng = etree.XPath('//dc:language//text()',
  50                            namespaces={'dc': str(DCNS)})(source_tree)
  51     short_lng = get_short_lng_code(bibl_lng[0])
  52     try:
  53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  54                                        short_lng + '.dic'))
  55     except:
  56         pass
  57
  58
  59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  61     for t in texts:
  62         parent = t.getparent()
  63         if hyph is not None:
  64             newt = ''
  65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  66             for w in wlist:
  67                 newt += hyph.inserted(w, u'\u00AD')
  68         else:
  69             newt = t
  70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  71         if t.is_text:
  72             parent.text = newt
  73         elif t.is_tail:
  74             parent.tail = newt
  75
  76
  77 def inner_xml(node):
  78     """ returns node's text and children as a string
  79
  80     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  81     x<b>y</b>z
  82     """
  83
  84     nt = node.text if node.text is not None else ''
  85     return ''.join([nt] + [etree.tostring(child) for child in node])
  86
  87
  88 def set_inner_xml(node, text):
  89     """ sets node's text and children from a string
  90
  91     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  92     >>> set_inner_xml(e, 'x<b>y</b>z')
  93     >>> print etree.tostring(e)
  94     <a>x<b>y</b>z</a>
  95     """
  96
  97     p = etree.fromstring('<x>%s</x>' % text)
  98     node.text = p.text
  99     node[:] = p[:]
 100
 101
 102 def node_name(node):
 103     """ Find out a node's name
 104
 105     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 106     XYZ
 107     """
 108
 109     tempnode = deepcopy(node)
 110
 111     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 112         for e in tempnode.findall('.//%s' % p):
 113             t = e.tail
 114             e.clear()
 115             e.tail = t
 116     etree.strip_tags(tempnode, '*')
 117     return tempnode.text
 118
 119
 120 def xslt(xml, sheet, **kwargs):
 121     if isinstance(xml, etree._Element):
 122         xml = etree.ElementTree(xml)
 123     with open(sheet) as xsltf:
 124         transform = etree.XSLT(etree.parse(xsltf))
 125         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
 126         return transform(xml, **params)
 127
 128
 129 def replace_characters(node):
 130     def replace_chars(text):
 131         if text is None:
 132             return None
 133         return text.replace(u"\ufeff", u"")\
 134                    .replace("---", u"\u2014")\
 135                    .replace("--", u"\u2013")\
 136                    .replace(",,", u"\u201E")\
 137                    .replace('"', u"\u201D")\
 138                    .replace("'", u"\u2019")
 139     if node.tag in ('uwaga', 'extra'):
 140         t = node.tail
 141         node.clear()
 142         node.tail = t
 143     node.text = replace_chars(node.text)
 144     node.tail = replace_chars(node.tail)
 145     for child in node:
 146         replace_characters(child)
 147
 148
 149 def find_annotations(annotations, source, part_no):
 150     for child in source:
 151         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 152             annotation = deepcopy(child)
 153             number = str(len(annotations) + 1)
 154             annotation.set('number', number)
 155             annotation.set('part', str(part_no))
 156             annotation.tail = ''
 157             annotations.append(annotation)
 158             tail = child.tail
 159             child.clear()
 160             child.tail = tail
 161             child.text = number
 162         if child.tag not in ('extra', 'uwaga'):
 163             find_annotations(annotations, child, part_no)
 164
 165
 166 class Stanza(object):
 167     """
 168     Converts / verse endings into verse elements in a stanza.
 169
 170     Slashes may only occur directly in the stanza. Any slashes in subelements
 171     will be ignored, and the subelements will be put inside verse elements.
 172
 173     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 174     >>> Stanza(s).versify()
 175     >>> print etree.tostring(s)
 176     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 177     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 178
 179     """
 180     def __init__(self, stanza_elem):
 181         self.stanza = stanza_elem
 182         self.verses = []
 183         self.open_verse = None
 184
 185     def versify(self):
 186         self.push_text(self.stanza.text)
 187         for elem in self.stanza:
 188             self.push_elem(elem)
 189             self.push_text(elem.tail)
 190         tail = self.stanza.tail
 191         self.stanza.clear()
 192         self.stanza.tail = tail
 193         self.stanza.extend(self.verses)
 194
 195     def open_normal_verse(self):
 196         self.open_verse = self.stanza.makeelement("wers_normalny")
 197         self.verses.append(self.open_verse)
 198
 199     def get_open_verse(self):
 200         if self.open_verse is None:
 201             self.open_normal_verse()
 202         return self.open_verse
 203
 204     def push_text(self, text):
 205         if not text:
 206             return
 207         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 208             if i:
 209                 self.open_normal_verse()
 210             if not verse_text.strip():
 211                 continue
 212             verse = self.get_open_verse()
 213             if len(verse):
 214                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 215             else:
 216                 verse.text = (verse.text or "") + verse_text
 217
 218     def push_elem(self, elem):
 219         if elem.tag.startswith("wers"):
 220             verse = deepcopy(elem)
 221             verse.tail = None
 222             self.verses.append(verse)
 223             self.open_verse = verse
 224         else:
 225             appended = deepcopy(elem)
 226             appended.tail = None
 227             self.get_open_verse().append(appended)
 228
 229
 230 def replace_by_verse(tree):
 231     """ Find stanzas and create new verses in place of a '/' character """
 232
 233     stanzas = tree.findall('.//' + WLNS('strofa'))
 234     for stanza in stanzas:
 235         Stanza(stanza).versify()
 236
 237
 238 def add_to_manifest(manifest, partno):
 239     """ Adds a node to the manifest section in content.opf file """
 240
 241     partstr = 'part%d' % partno
 242     e = manifest.makeelement(
 243         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 244                                'media-type': 'application/xhtml+xml'}
 245     )
 246     manifest.append(e)
 247
 248
 249 def add_to_spine(spine, partno):
 250     """ Adds a node to the spine section in content.opf file """
 251
 252     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 253     spine.append(e)
 254
 255
 256 class TOC(object):
 257     def __init__(self, name=None, part_href=None):
 258         self.children = []
 259         self.name = name
 260         self.part_href = part_href
 261         self.sub_number = None
 262
 263     def add(self, name, part_href, level=0, is_part=True, index=None):
 264         assert level == 0 or index is None
 265         if level > 0 and self.children:
 266             return self.children[-1].add(name, part_href, level - 1, is_part)
 267         else:
 268             t = TOC(name)
 269             t.part_href = part_href
 270             if index is not None:
 271                 self.children.insert(index, t)
 272             else:
 273                 self.children.append(t)
 274             if not is_part:
 275                 t.sub_number = len(self.children) + 1
 276                 return t.sub_number
 277
 278     def append(self, toc):
 279         self.children.append(toc)
 280
 281     def extend(self, toc):
 282         self.children.extend(toc.children)
 283
 284     def depth(self):
 285         if self.children:
 286             return max((c.depth() for c in self.children)) + 1
 287         else:
 288             return 0
 289
 290     def href(self):
 291         src = self.part_href
 292         if self.sub_number is not None:
 293             src += '#sub%d' % self.sub_number
 294         return src
 295
 296     def write_to_xml(self, nav_map, counter=1):
 297         for child in self.children:
 298             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 299             nav_point.set('id', 'NavPoint-%d' % counter)
 300             nav_point.set('playOrder', str(counter))
 301
 302             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 303             text = nav_map.makeelement(NCXNS('text'))
 304             if child.name is not None:
 305                 text.text = re.sub(r'\n', ' ', child.name)
 306             else:
 307                 text.text = child.name
 308             nav_label.append(text)
 309             nav_point.append(nav_label)
 310
 311             content = nav_map.makeelement(NCXNS('content'))
 312             content.set('src', child.href())
 313             nav_point.append(content)
 314             nav_map.append(nav_point)
 315             counter = child.write_to_xml(nav_point, counter + 1)
 316         return counter
 317
 318     def html_part(self, depth=0):
 319         texts = []
 320         for child in self.children:
 321             texts.append(
 322                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 323                 (depth, child.href(), child.name))
 324             texts.append(child.html_part(depth + 1))
 325         return "\n".join(texts)
 326
 327     def html(self):
 328         with open(get_resource('epub/toc.html')) as f:
 329             t = unicode(f.read(), 'utf-8')
 330         return t % self.html_part()
 331
 332
 333 def used_chars(element):
 334     """ Lists characters used in an ETree Element """
 335     chars = set((element.text or '') + (element.tail or ''))
 336     for child in element:
 337         chars = chars.union(used_chars(child))
 338     return chars
 339
 340
 341 def chop(main_text):
 342     """ divide main content of the XML file into chunks """
 343
 344     # prepare a container for each chunk
 345     part_xml = etree.Element('utwor')
 346     etree.SubElement(part_xml, 'master')
 347     main_xml_part = part_xml[0]  # master
 348
 349     last_node_part = False
 350
 351     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 352     is_scene = False
 353     is_act = False
 354     for one_part in main_text:
 355         name = one_part.tag
 356         if name == 'naglowek_scena':
 357             is_scene = True
 358         elif name == 'naglowek_akt':
 359             is_act = True
 360
 361     for one_part in main_text:
 362         name = one_part.tag
 363         if is_act is False and is_scene is True:
 364             if name == 'naglowek_czesc':
 365                 yield part_xml
 366                 last_node_part = True
 367                 main_xml_part[:] = [deepcopy(one_part)]
 368             elif not last_node_part and name == "naglowek_scena":
 369                 yield part_xml
 370                 main_xml_part[:] = [deepcopy(one_part)]
 371             else:
 372                 main_xml_part.append(deepcopy(one_part))
 373                 last_node_part = False
 374         else:
 375             if name == 'naglowek_czesc':
 376                 yield part_xml
 377                 last_node_part = True
 378                 main_xml_part[:] = [deepcopy(one_part)]
 379             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 380                 yield part_xml
 381                 main_xml_part[:] = [deepcopy(one_part)]
 382             else:
 383                 main_xml_part.append(deepcopy(one_part))
 384                 last_node_part = False
 385     yield part_xml
 386
 387
 388 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 389     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 390
 391     toc = TOC()
 392     for element in chunk_xml[0]:
 393         if element.tag == "naglowek_czesc":
 394             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 395         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 396             toc.add(node_name(element), "part%d.html" % chunk_no)
 397         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 398             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 399             element.set('sub', str(subnumber))
 400     if empty:
 401         if not _empty_html_static:
 402             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 403         chars = set()
 404         output_html = _empty_html_static[0]
 405     else:
 406         find_annotations(annotations, chunk_xml, chunk_no)
 407         replace_by_verse(chunk_xml)
 408         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 409         chars = used_chars(html_tree.getroot())
 410         output_html = etree.tostring(
 411             html_tree, pretty_print=True, xml_declaration=True,
 412             encoding="utf-8",
 413             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 414                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 415         )
 416     return output_html, toc, chars
 417
 418
 419 def transform(wldoc, verbose=False, style=None, html_toc=False,
 420               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
 421     """ produces a EPUB file
 422
 423     sample=n: generate sample e-book (with at least n paragraphs)
 424     cover: a cover.Cover factory or True for default
 425     flags: less-advertising, without-fonts, working-copy
 426     """
 427
 428     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 429         """ processes one input file and proceeds to its children """
 430
 431         replace_characters(wldoc.edoc.getroot())
 432
 433         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 434         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 435
 436         # every input file will have a TOC entry,
 437         # pointing to starting chunk
 438         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 439         chars = set()
 440         if first:
 441             # write book title page
 442             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
 443             chars = used_chars(html_tree.getroot())
 444             html_string = etree.tostring(
 445                 html_tree, pretty_print=True, xml_declaration=True,
 446                 encoding="utf-8",
 447                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 448                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 449             )
 450             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
 451             # add a title page TOC entry
 452             toc.add(u"Strona tytułowa", "title.html")
 453         elif wldoc.book_info.parts:
 454             # write title page for every parent
 455             if sample is not None and sample <= 0:
 456                 chars = set()
 457                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 458             else:
 459                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 460                 chars = used_chars(html_tree.getroot())
 461                 html_string = etree.tostring(
 462                     html_tree, pretty_print=True, xml_declaration=True,
 463                     encoding="utf-8",
 464                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 465                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 466                 )
 467             zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
 468             add_to_manifest(manifest, chunk_counter)
 469             add_to_spine(spine, chunk_counter)
 470             chunk_counter += 1
 471
 472         if len(wldoc.edoc.getroot()) > 1:
 473             # rdf before style master
 474             main_text = wldoc.edoc.getroot()[1]
 475         else:
 476             # rdf in style master
 477             main_text = wldoc.edoc.getroot()[0]
 478             if main_text.tag == RDFNS('RDF'):
 479                 main_text = None
 480
 481         if main_text is not None:
 482             for chunk_xml in chop(main_text):
 483                 empty = False
 484                 if sample is not None:
 485                     if sample <= 0:
 486                         empty = True
 487                     else:
 488                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 489                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 490
 491                 toc.extend(chunk_toc)
 492                 chars = chars.union(chunk_chars)
 493                 zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
 494                 add_to_manifest(manifest, chunk_counter)
 495                 add_to_spine(spine, chunk_counter)
 496                 chunk_counter += 1
 497
 498         for child in wldoc.parts():
 499             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 500                 child, chunk_counter, first=False, sample=sample)
 501             toc.append(child_toc)
 502             chars = chars.union(chunk_chars)
 503
 504         return toc, chunk_counter, chars, sample
 505
 506     document = deepcopy(wldoc)
 507     del wldoc
 508
 509     if flags:
 510         for flag in flags:
 511             document.edoc.getroot().set(flag, 'yes')
 512
 513     document.clean_ed_note()
 514     document.clean_ed_note('abstrakt')
 515
 516     # add editors info
 517     editors = document.editors()
 518     if editors:
 519         document.edoc.getroot().set('editors', u', '.join(sorted(
 520             editor.readable() for editor in editors)))
 521     if document.book_info.funders:
 522         document.edoc.getroot().set('funders', u', '.join(
 523             document.book_info.funders))
 524     if document.book_info.thanks:
 525         document.edoc.getroot().set('thanks', document.book_info.thanks)
 526
 527     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 528     manifest = opf.find('.//' + OPFNS('manifest'))
 529     guide = opf.find('.//' + OPFNS('guide'))
 530     spine = opf.find('.//' + OPFNS('spine'))
 531
 532     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 533     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 534
 535     functions.reg_mathml_epub(zip)
 536
 537     if os.path.isdir(ilustr_path):
 538         for i, filename in enumerate(os.listdir(ilustr_path)):
 539             file_path = os.path.join(ilustr_path, filename)
 540             zip.write(file_path, os.path.join('OPS', filename))
 541             image_id = 'image%s' % i
 542             manifest.append(etree.fromstring(
 543                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 544
 545     # write static elements
 546     mime = zipfile.ZipInfo()
 547     mime.filename = 'mimetype'
 548     mime.compress_type = zipfile.ZIP_STORED
 549     mime.extra = ''
 550     zip.writestr(mime, 'application/epub+zip')
 551     zip.writestr(
 552         'META-INF/container.xml',
 553         '<?xml version="1.0" ?>'
 554         '<container version="1.0" '
 555         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 556         '<rootfiles><rootfile full-path="OPS/content.opf" '
 557         'media-type="application/oebps-package+xml" />'
 558         '</rootfiles></container>'
 559     )
 560     zip.write(get_resource('res/wl-logo-small.png'),
 561               os.path.join('OPS', 'logo_wolnelektury.png'))
 562     zip.write(get_resource('res/jedenprocent.png'),
 563               os.path.join('OPS', 'jedenprocent.png'))
 564     if not style:
 565         style = get_resource('epub/style.css')
 566     zip.write(style, os.path.join('OPS', 'style.css'))
 567
 568     if cover:
 569         if cover is True:
 570             cover = make_cover
 571
 572         cover_file = StringIO()
 573         bound_cover = cover(document.book_info)
 574         bound_cover.save(cover_file)
 575         cover_name = 'cover.%s' % bound_cover.ext()
 576         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 577         del cover_file
 578
 579         cover_tree = etree.parse(get_resource('epub/cover.html'))
 580         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 581         zip.writestr('OPS/cover.html', etree.tostring(
 582             cover_tree, pretty_print=True, xml_declaration=True,
 583             encoding="utf-8",
 584             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 585                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 586         ))
 587
 588         if bound_cover.uses_dc_cover:
 589             if document.book_info.cover_by:
 590                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 591             if document.book_info.cover_source:
 592                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 593
 594         manifest.append(etree.fromstring(
 595             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 596         manifest.append(etree.fromstring(
 597             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 598         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 599         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 600         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 601
 602     annotations = etree.Element('annotations')
 603
 604     toc_file = etree.fromstring(
 605         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 606         '"-//NISO//DTD ncx 2005-1//EN" '
 607         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 608         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 609         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 610         '</navMap></ncx>'
 611     )
 612     nav_map = toc_file[-1]
 613
 614     if html_toc:
 615         manifest.append(etree.fromstring(
 616             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 617         spine.append(etree.fromstring(
 618             '<itemref idref="html_toc" />'))
 619         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 620
 621     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 622
 623     if len(toc.children) < 2:
 624         toc.add(u"Początek utworu", "part1.html")
 625
 626     # Last modifications in container files and EPUB creation
 627     if len(annotations) > 0:
 628         toc.add("Przypisy", "annotations.html")
 629         manifest.append(etree.fromstring(
 630             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 631         spine.append(etree.fromstring(
 632             '<itemref idref="annotations" />'))
 633         replace_by_verse(annotations)
 634         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 635         chars = chars.union(used_chars(html_tree.getroot()))
 636         zip.writestr('OPS/annotations.html', etree.tostring(
 637             html_tree, pretty_print=True, xml_declaration=True,
 638             encoding="utf-8",
 639             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 640                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 641         ))
 642
 643     toc.add("Wesprzyj Wolne Lektury", "support.html")
 644     manifest.append(etree.fromstring(
 645         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 646     spine.append(etree.fromstring(
 647         '<itemref idref="support" />'))
 648     html_string = open(get_resource('epub/support.html')).read()
 649     chars.update(used_chars(etree.fromstring(html_string)))
 650     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
 651
 652     toc.add("Strona redakcyjna", "last.html")
 653     manifest.append(etree.fromstring(
 654         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 655     spine.append(etree.fromstring(
 656         '<itemref idref="last" />'))
 657     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
 658     chars.update(used_chars(html_tree.getroot()))
 659     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
 660         html_tree, pretty_print=True, xml_declaration=True,
 661         encoding="utf-8",
 662         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 663                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 664     )))
 665
 666     if not flags or 'without-fonts' not in flags:
 667         # strip fonts
 668         tmpdir = mkdtemp('-librarian-epub')
 669         try:
 670             cwd = os.getcwd()
 671         except OSError:
 672             cwd = None
 673
 674         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 675         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 676             optimizer_call = ['perl', 'subset.pl', '--chars',
 677                               ''.join(chars).encode('utf-8'),
 678                               get_resource('fonts/' + fname),
 679                               os.path.join(tmpdir, fname)]
 680             env = {"PERL_USE_UNSAFE_INC": "1"}
 681             if verbose:
 682                 print "Running font-optimizer"
 683                 subprocess.check_call(optimizer_call, env=env)
 684             else:
 685                 dev_null = open(os.devnull, 'w')
 686                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
 687             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 688             manifest.append(etree.fromstring(
 689                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 690         rmtree(tmpdir)
 691         if cwd is not None:
 692             os.chdir(cwd)
 693     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 694                  xml_declaration=True, encoding="utf-8"))
 695     title = document.book_info.title
 696     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 697     for st in attributes:
 698         meta = toc_file.makeelement(NCXNS('meta'))
 699         meta.set('name', st)
 700         meta.set('content', '0')
 701         toc_file[0].append(meta)
 702     toc_file[0][0].set('content', str(document.book_info.url))
 703     toc_file[0][1].set('content', str(toc.depth()))
 704     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 705
 706     # write TOC
 707     if html_toc:
 708         toc.add(u"Spis treści", "toc.html", index=1)
 709         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 710     toc.write_to_xml(nav_map)
 711     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 712                  xml_declaration=True, encoding="utf-8"))
 713     zip.close()
 714
 715     return OutputFile.from_filename(output_file.name)