librarian/epub.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from __future__ import with_statement
   7
   8 import os
   9 import os.path
  10 import re
  11 import subprocess
  12 from StringIO import StringIO
  13 from copy import deepcopy
  14 from mimetypes import guess_type
  15
  16 from lxml import etree
  17 import zipfile
  18 from tempfile import mkdtemp, NamedTemporaryFile
  19 from shutil import rmtree
  20
  21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
  22 from librarian.cover import make_cover
  23
  24 from librarian import functions, get_resource
  25
  26 from librarian.hyphenator import Hyphenator
  27
  28 functions.reg_person_name()
  29 functions.reg_lang_code_3to2()
  30
  31
  32 def set_hyph_language(source_tree):
  33     def get_short_lng_code(text):
  34         result = ''
  35         text = ''.join(text)
  36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
  37             for line in f:
  38                 list = line.strip().split('|')
  39                 if list[0] == text:
  40                     result = list[2]
  41         if result == '':
  42             return text
  43         else:
  44             return result
  45     bibl_lng = etree.XPath('//dc:language//text()',
  46                            namespaces={'dc': str(DCNS)})(source_tree)
  47     short_lng = get_short_lng_code(bibl_lng[0])
  48     try:
  49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
  50                                        short_lng + '.dic'))
  51     except:
  52         pass
  53
  54
  55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
  56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
  57     for t in texts:
  58         parent = t.getparent()
  59         if hyph is not None:
  60             newt = ''
  61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
  62             for w in wlist:
  63                 newt += hyph.inserted(w, u'\u00AD')
  64         else:
  65             newt = t
  66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
  67         if t.is_text:
  68             parent.text = newt
  69         elif t.is_tail:
  70             parent.tail = newt
  71
  72
  73 def inner_xml(node):
  74     """ returns node's text and children as a string
  75
  76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
  77     x<b>y</b>z
  78     """
  79
  80     nt = node.text if node.text is not None else ''
  81     return ''.join([nt] + [etree.tostring(child) for child in node])
  82
  83
  84 def set_inner_xml(node, text):
  85     """ sets node's text and children from a string
  86
  87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
  88     >>> set_inner_xml(e, 'x<b>y</b>z')
  89     >>> print etree.tostring(e)
  90     <a>x<b>y</b>z</a>
  91     """
  92
  93     p = etree.fromstring('<x>%s</x>' % text)
  94     node.text = p.text
  95     node[:] = p[:]
  96
  97
  98 def node_name(node):
  99     """ Find out a node's name
 100
 101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
 102     XYZ
 103     """
 104
 105     tempnode = deepcopy(node)
 106
 107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
 108         for e in tempnode.findall('.//%s' % p):
 109             t = e.tail
 110             e.clear()
 111             e.tail = t
 112     etree.strip_tags(tempnode, '*')
 113     return tempnode.text
 114
 115
 116 def xslt(xml, sheet, **kwargs):
 117     if isinstance(xml, etree._Element):
 118         xml = etree.ElementTree(xml)
 119     with open(sheet) as xsltf:
 120         transform = etree.XSLT(etree.parse(xsltf))
 121         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
 122         return transform(xml, **params)
 123
 124
 125 def replace_characters(node):
 126     def replace_chars(text):
 127         if text is None:
 128             return None
 129         return text.replace(u"\ufeff", u"")\
 130                    .replace("---", u"\u2014")\
 131                    .replace("--", u"\u2013")\
 132                    .replace(",,", u"\u201E")\
 133                    .replace('"', u"\u201D")\
 134                    .replace("'", u"\u2019")
 135     if node.tag in ('uwaga', 'extra'):
 136         t = node.tail
 137         node.clear()
 138         node.tail = t
 139     node.text = replace_chars(node.text)
 140     node.tail = replace_chars(node.tail)
 141     for child in node:
 142         replace_characters(child)
 143
 144
 145 def find_annotations(annotations, source, part_no):
 146     for child in source:
 147         if child.tag in ('pe', 'pa', 'pt', 'pr'):
 148             annotation = deepcopy(child)
 149             number = str(len(annotations) + 1)
 150             annotation.set('number', number)
 151             annotation.set('part', str(part_no))
 152             annotation.tail = ''
 153             annotations.append(annotation)
 154             tail = child.tail
 155             child.clear()
 156             child.tail = tail
 157             child.text = number
 158         if child.tag not in ('extra', 'uwaga'):
 159             find_annotations(annotations, child, part_no)
 160
 161
 162 class Stanza(object):
 163     """
 164     Converts / verse endings into verse elements in a stanza.
 165
 166     Slashes may only occur directly in the stanza. Any slashes in subelements
 167     will be ignored, and the subelements will be put inside verse elements.
 168
 169     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
 170     >>> Stanza(s).versify()
 171     >>> print etree.tostring(s)
 172     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
 173     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
 174
 175     """
 176     def __init__(self, stanza_elem):
 177         self.stanza = stanza_elem
 178         self.verses = []
 179         self.open_verse = None
 180
 181     def versify(self):
 182         self.push_text(self.stanza.text)
 183         for elem in self.stanza:
 184             self.push_elem(elem)
 185             self.push_text(elem.tail)
 186         tail = self.stanza.tail
 187         self.stanza.clear()
 188         self.stanza.tail = tail
 189         self.stanza.extend(self.verses)
 190
 191     def open_normal_verse(self):
 192         self.open_verse = self.stanza.makeelement("wers_normalny")
 193         self.verses.append(self.open_verse)
 194
 195     def get_open_verse(self):
 196         if self.open_verse is None:
 197             self.open_normal_verse()
 198         return self.open_verse
 199
 200     def push_text(self, text):
 201         if not text:
 202             return
 203         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
 204             if i:
 205                 self.open_normal_verse()
 206             if not verse_text.strip():
 207                 continue
 208             verse = self.get_open_verse()
 209             if len(verse):
 210                 verse[-1].tail = (verse[-1].tail or "") + verse_text
 211             else:
 212                 verse.text = (verse.text or "") + verse_text
 213
 214     def push_elem(self, elem):
 215         if elem.tag.startswith("wers"):
 216             verse = deepcopy(elem)
 217             verse.tail = None
 218             self.verses.append(verse)
 219             self.open_verse = verse
 220         else:
 221             appended = deepcopy(elem)
 222             appended.tail = None
 223             self.get_open_verse().append(appended)
 224
 225
 226 def replace_by_verse(tree):
 227     """ Find stanzas and create new verses in place of a '/' character """
 228
 229     stanzas = tree.findall('.//' + WLNS('strofa'))
 230     for stanza in stanzas:
 231         Stanza(stanza).versify()
 232
 233
 234 def add_to_manifest(manifest, partno):
 235     """ Adds a node to the manifest section in content.opf file """
 236
 237     partstr = 'part%d' % partno
 238     e = manifest.makeelement(
 239         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
 240                                'media-type': 'application/xhtml+xml'}
 241     )
 242     manifest.append(e)
 243
 244
 245 def add_to_spine(spine, partno):
 246     """ Adds a node to the spine section in content.opf file """
 247
 248     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
 249     spine.append(e)
 250
 251
 252 class TOC(object):
 253     def __init__(self, name=None, part_href=None):
 254         self.children = []
 255         self.name = name
 256         self.part_href = part_href
 257         self.sub_number = None
 258
 259     def add(self, name, part_href, level=0, is_part=True, index=None):
 260         assert level == 0 or index is None
 261         if level > 0 and self.children:
 262             return self.children[-1].add(name, part_href, level - 1, is_part)
 263         else:
 264             t = TOC(name)
 265             t.part_href = part_href
 266             if index is not None:
 267                 self.children.insert(index, t)
 268             else:
 269                 self.children.append(t)
 270             if not is_part:
 271                 t.sub_number = len(self.children) + 1
 272                 return t.sub_number
 273
 274     def append(self, toc):
 275         self.children.append(toc)
 276
 277     def extend(self, toc):
 278         self.children.extend(toc.children)
 279
 280     def depth(self):
 281         if self.children:
 282             return max((c.depth() for c in self.children)) + 1
 283         else:
 284             return 0
 285
 286     def href(self):
 287         src = self.part_href
 288         if self.sub_number is not None:
 289             src += '#sub%d' % self.sub_number
 290         return src
 291
 292     def write_to_xml(self, nav_map, counter=1):
 293         for child in self.children:
 294             nav_point = nav_map.makeelement(NCXNS('navPoint'))
 295             nav_point.set('id', 'NavPoint-%d' % counter)
 296             nav_point.set('playOrder', str(counter))
 297
 298             nav_label = nav_map.makeelement(NCXNS('navLabel'))
 299             text = nav_map.makeelement(NCXNS('text'))
 300             if child.name is not None:
 301                 text.text = re.sub(r'\n', ' ', child.name)
 302             else:
 303                 text.text = child.name
 304             nav_label.append(text)
 305             nav_point.append(nav_label)
 306
 307             content = nav_map.makeelement(NCXNS('content'))
 308             content.set('src', child.href())
 309             nav_point.append(content)
 310             nav_map.append(nav_point)
 311             counter = child.write_to_xml(nav_point, counter + 1)
 312         return counter
 313
 314     def html_part(self, depth=0):
 315         texts = []
 316         for child in self.children:
 317             texts.append(
 318                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
 319                 (depth, child.href(), child.name))
 320             texts.append(child.html_part(depth + 1))
 321         return "\n".join(texts)
 322
 323     def html(self):
 324         with open(get_resource('epub/toc.html')) as f:
 325             t = unicode(f.read(), 'utf-8')
 326         return t % self.html_part()
 327
 328
 329 def used_chars(element):
 330     """ Lists characters used in an ETree Element """
 331     chars = set((element.text or '') + (element.tail or ''))
 332     for child in element:
 333         chars = chars.union(used_chars(child))
 334     return chars
 335
 336
 337 def chop(main_text):
 338     """ divide main content of the XML file into chunks """
 339
 340     # prepare a container for each chunk
 341     part_xml = etree.Element('utwor')
 342     etree.SubElement(part_xml, 'master')
 343     main_xml_part = part_xml[0]  # master
 344
 345     last_node_part = False
 346
 347     # the below loop are workaround for a problem with epubs in drama ebooks without acts
 348     is_scene = False
 349     is_act = False
 350     for one_part in main_text:
 351         name = one_part.tag
 352         if name == 'naglowek_scena':
 353             is_scene = True
 354         elif name == 'naglowek_akt':
 355             is_act = True
 356
 357     for one_part in main_text:
 358         name = one_part.tag
 359         if is_act is False and is_scene is True:
 360             if name == 'naglowek_czesc':
 361                 yield part_xml
 362                 last_node_part = True
 363                 main_xml_part[:] = [deepcopy(one_part)]
 364             elif not last_node_part and name == "naglowek_scena":
 365                 yield part_xml
 366                 main_xml_part[:] = [deepcopy(one_part)]
 367             else:
 368                 main_xml_part.append(deepcopy(one_part))
 369                 last_node_part = False
 370         else:
 371             if name == 'naglowek_czesc':
 372                 yield part_xml
 373                 last_node_part = True
 374                 main_xml_part[:] = [deepcopy(one_part)]
 375             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 376                 yield part_xml
 377                 main_xml_part[:] = [deepcopy(one_part)]
 378             else:
 379                 main_xml_part.append(deepcopy(one_part))
 380                 last_node_part = False
 381     yield part_xml
 382
 383
 384 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
 385     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
 386
 387     toc = TOC()
 388     for element in chunk_xml[0]:
 389         if element.tag == "naglowek_czesc":
 390             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
 391         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
 392             toc.add(node_name(element), "part%d.html" % chunk_no)
 393         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
 394             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
 395             element.set('sub', str(subnumber))
 396     if empty:
 397         if not _empty_html_static:
 398             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
 399         chars = set()
 400         output_html = _empty_html_static[0]
 401     else:
 402         find_annotations(annotations, chunk_xml, chunk_no)
 403         replace_by_verse(chunk_xml)
 404         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
 405         chars = used_chars(html_tree.getroot())
 406         output_html = etree.tostring(
 407             html_tree, pretty_print=True, xml_declaration=True,
 408             encoding="utf-8",
 409             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 410                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 411         )
 412     return output_html, toc, chars
 413
 414
 415 def transform(wldoc, verbose=False, style=None, html_toc=False,
 416               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
 417     """ produces a EPUB file
 418
 419     sample=n: generate sample e-book (with at least n paragraphs)
 420     cover: a cover.Cover factory or True for default
 421     flags: less-advertising, without-fonts, working-copy
 422     """
 423
 424     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
 425         """ processes one input file and proceeds to its children """
 426
 427         replace_characters(wldoc.edoc.getroot())
 428
 429         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
 430         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
 431
 432         # every input file will have a TOC entry,
 433         # pointing to starting chunk
 434         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
 435         chars = set()
 436         if first:
 437             # write book title page
 438             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
 439             chars = used_chars(html_tree.getroot())
 440             zip.writestr(
 441                 'OPS/title.html',
 442                 etree.tostring(
 443                     html_tree, pretty_print=True, xml_declaration=True,
 444                     encoding="utf-8",
 445                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 446                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 447                 )
 448             )
 449             # add a title page TOC entry
 450             toc.add(u"Strona tytułowa", "title.html")
 451         elif wldoc.book_info.parts:
 452             # write title page for every parent
 453             if sample is not None and sample <= 0:
 454                 chars = set()
 455                 html_string = open(get_resource('epub/emptyChunk.html')).read()
 456             else:
 457                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
 458                 chars = used_chars(html_tree.getroot())
 459                 html_string = etree.tostring(
 460                     html_tree, pretty_print=True, xml_declaration=True,
 461                     encoding="utf-8",
 462                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
 463                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 464                 )
 465             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
 466             add_to_manifest(manifest, chunk_counter)
 467             add_to_spine(spine, chunk_counter)
 468             chunk_counter += 1
 469
 470         if len(wldoc.edoc.getroot()) > 1:
 471             # rdf before style master
 472             main_text = wldoc.edoc.getroot()[1]
 473         else:
 474             # rdf in style master
 475             main_text = wldoc.edoc.getroot()[0]
 476             if main_text.tag == RDFNS('RDF'):
 477                 main_text = None
 478
 479         if main_text is not None:
 480             for chunk_xml in chop(main_text):
 481                 empty = False
 482                 if sample is not None:
 483                     if sample <= 0:
 484                         empty = True
 485                     else:
 486                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
 487                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
 488
 489                 toc.extend(chunk_toc)
 490                 chars = chars.union(chunk_chars)
 491                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
 492                 add_to_manifest(manifest, chunk_counter)
 493                 add_to_spine(spine, chunk_counter)
 494                 chunk_counter += 1
 495
 496         for child in wldoc.parts():
 497             child_toc, chunk_counter, chunk_chars, sample = transform_file(
 498                 child, chunk_counter, first=False, sample=sample)
 499             toc.append(child_toc)
 500             chars = chars.union(chunk_chars)
 501
 502         return toc, chunk_counter, chars, sample
 503
 504     document = deepcopy(wldoc)
 505     del wldoc
 506
 507     if flags:
 508         for flag in flags:
 509             document.edoc.getroot().set(flag, 'yes')
 510
 511     document.clean_ed_note()
 512     document.clean_ed_note('abstrakt')
 513
 514     # add editors info
 515     editors = document.editors()
 516     if editors:
 517         document.edoc.getroot().set('editors', u', '.join(sorted(
 518             editor.readable() for editor in editors)))
 519     if document.book_info.funders:
 520         document.edoc.getroot().set('funders', u', '.join(
 521             document.book_info.funders))
 522     if document.book_info.thanks:
 523         document.edoc.getroot().set('thanks', document.book_info.thanks)
 524
 525     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
 526     manifest = opf.find('.//' + OPFNS('manifest'))
 527     guide = opf.find('.//' + OPFNS('guide'))
 528     spine = opf.find('.//' + OPFNS('spine'))
 529
 530     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
 531     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
 532
 533     functions.reg_mathml_epub(zip)
 534
 535     if os.path.isdir(ilustr_path):
 536         for i, filename in enumerate(os.listdir(ilustr_path)):
 537             file_path = os.path.join(ilustr_path, filename)
 538             zip.write(file_path, os.path.join('OPS', filename))
 539             image_id = 'image%s' % i
 540             manifest.append(etree.fromstring(
 541                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
 542
 543     # write static elements
 544     mime = zipfile.ZipInfo()
 545     mime.filename = 'mimetype'
 546     mime.compress_type = zipfile.ZIP_STORED
 547     mime.extra = ''
 548     zip.writestr(mime, 'application/epub+zip')
 549     zip.writestr(
 550         'META-INF/container.xml',
 551         '<?xml version="1.0" ?>'
 552         '<container version="1.0" '
 553         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
 554         '<rootfiles><rootfile full-path="OPS/content.opf" '
 555         'media-type="application/oebps-package+xml" />'
 556         '</rootfiles></container>'
 557     )
 558     zip.write(get_resource('res/wl-logo-small.png'),
 559               os.path.join('OPS', 'logo_wolnelektury.png'))
 560     zip.write(get_resource('res/jedenprocent.png'),
 561               os.path.join('OPS', 'jedenprocent.png'))
 562     if not style:
 563         style = get_resource('epub/style.css')
 564     zip.write(style, os.path.join('OPS', 'style.css'))
 565
 566     if cover:
 567         if cover is True:
 568             cover = make_cover
 569
 570         cover_file = StringIO()
 571         bound_cover = cover(document.book_info)
 572         bound_cover.save(cover_file)
 573         cover_name = 'cover.%s' % bound_cover.ext()
 574         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
 575         del cover_file
 576
 577         cover_tree = etree.parse(get_resource('epub/cover.html'))
 578         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
 579         zip.writestr('OPS/cover.html', etree.tostring(
 580             cover_tree, pretty_print=True, xml_declaration=True,
 581             encoding="utf-8",
 582             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 583                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 584         ))
 585
 586         if bound_cover.uses_dc_cover:
 587             if document.book_info.cover_by:
 588                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
 589             if document.book_info.cover_source:
 590                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
 591
 592         manifest.append(etree.fromstring(
 593             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
 594         manifest.append(etree.fromstring(
 595             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
 596         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
 597         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
 598         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
 599
 600     annotations = etree.Element('annotations')
 601
 602     toc_file = etree.fromstring(
 603         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
 604         '"-//NISO//DTD ncx 2005-1//EN" '
 605         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
 606         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
 607         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
 608         '</navMap></ncx>'
 609     )
 610     nav_map = toc_file[-1]
 611
 612     if html_toc:
 613         manifest.append(etree.fromstring(
 614             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
 615         spine.append(etree.fromstring(
 616             '<itemref idref="html_toc" />'))
 617         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
 618
 619     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
 620
 621     if len(toc.children) < 2:
 622         toc.add(u"Początek utworu", "part1.html")
 623
 624     # Last modifications in container files and EPUB creation
 625     if len(annotations) > 0:
 626         toc.add("Przypisy", "annotations.html")
 627         manifest.append(etree.fromstring(
 628             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
 629         spine.append(etree.fromstring(
 630             '<itemref idref="annotations" />'))
 631         replace_by_verse(annotations)
 632         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
 633         chars = chars.union(used_chars(html_tree.getroot()))
 634         zip.writestr('OPS/annotations.html', etree.tostring(
 635             html_tree, pretty_print=True, xml_declaration=True,
 636             encoding="utf-8",
 637             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 638                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 639         ))
 640
 641     toc.add("Wesprzyj Wolne Lektury", "support.html")
 642     manifest.append(etree.fromstring(
 643         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
 644     spine.append(etree.fromstring(
 645         '<itemref idref="support" />'))
 646     html_string = open(get_resource('epub/support.html')).read()
 647     chars.update(used_chars(etree.fromstring(html_string)))
 648     zip.writestr('OPS/support.html', html_string)
 649
 650     toc.add("Strona redakcyjna", "last.html")
 651     manifest.append(etree.fromstring(
 652         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
 653     spine.append(etree.fromstring(
 654         '<itemref idref="last" />'))
 655     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
 656     chars.update(used_chars(html_tree.getroot()))
 657     zip.writestr('OPS/last.html', etree.tostring(
 658         html_tree, pretty_print=True, xml_declaration=True,
 659         encoding="utf-8",
 660         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
 661                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
 662     ))
 663
 664     if not flags or 'without-fonts' not in flags:
 665         # strip fonts
 666         tmpdir = mkdtemp('-librarian-epub')
 667         try:
 668             cwd = os.getcwd()
 669         except OSError:
 670             cwd = None
 671
 672         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
 673         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
 674             optimizer_call = ['perl', 'subset.pl', '--chars',
 675                               ''.join(chars).encode('utf-8'),
 676                               get_resource('fonts/' + fname),
 677                               os.path.join(tmpdir, fname)]
 678             env = {"PERL_USE_UNSAFE_INC": "1"}
 679             if verbose:
 680                 print "Running font-optimizer"
 681                 subprocess.check_call(optimizer_call, env=env)
 682             else:
 683                 dev_null = open(os.devnull, 'w')
 684                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
 685             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
 686             manifest.append(etree.fromstring(
 687                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
 688         rmtree(tmpdir)
 689         if cwd is not None:
 690             os.chdir(cwd)
 691     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
 692                  xml_declaration=True, encoding="utf-8"))
 693     title = document.book_info.title
 694     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
 695     for st in attributes:
 696         meta = toc_file.makeelement(NCXNS('meta'))
 697         meta.set('name', st)
 698         meta.set('content', '0')
 699         toc_file[0].append(meta)
 700     toc_file[0][0].set('content', str(document.book_info.url))
 701     toc_file[0][1].set('content', str(toc.depth()))
 702     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
 703
 704     # write TOC
 705     if html_toc:
 706         toc.add(u"Spis treści", "toc.html", index=1)
 707         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
 708     toc.write_to_xml(nav_map)
 709     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
 710                  xml_declaration=True, encoding="utf-8"))
 711     zip.close()
 712
 713     return OutputFile.from_filename(output_file.name)