Housekeeping.
[librarian.git] / src / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import print_function, unicode_literals
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from six import BytesIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import make_cover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def squeeze_whitespace(s):
33     return re.sub(b'\\s+', b' ', s)
34
35
36 def set_hyph_language(source_tree):
37     def get_short_lng_code(text):
38         result = ''
39         text = ''.join(text)
40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
41             for line in f.read().decode('latin1').split('\n'):
42                 list = line.strip().split('|')
43                 if list[0] == text:
44                     result = list[2]
45         if result == '':
46             return text
47         else:
48             return result
49     bibl_lng = etree.XPath('//dc:language//text()',
50                            namespaces={'dc': str(DCNS)})(source_tree)
51     short_lng = get_short_lng_code(bibl_lng[0])
52     try:
53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
54                                        short_lng + '.dic'))
55     except:
56         pass
57
58
59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
61     for t in texts:
62         parent = t.getparent()
63         if hyph is not None:
64             newt = ''
65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
66             for w in wlist:
67                 newt += hyph.inserted(w, u'\u00AD')
68         else:
69             newt = t
70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
71         if t.is_text:
72             parent.text = newt
73         elif t.is_tail:
74             parent.tail = newt
75
76
77 def inner_xml(node):
78     """ returns node's text and children as a string
79
80     >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
81     x<b>y</b>z
82     """
83
84     nt = node.text if node.text is not None else ''
85     return ''.join(
86         [nt] + [etree.tostring(child, encoding='unicode') for child in node]
87     )
88
89
90 def set_inner_xml(node, text):
91     """ sets node's text and children from a string
92
93     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
94     >>> set_inner_xml(e, 'x<b>y</b>z')
95     >>> print(etree.tostring(e, encoding='unicode'))
96     <a>x<b>y</b>z</a>
97     """
98
99     p = etree.fromstring('<x>%s</x>' % text)
100     node.text = p.text
101     node[:] = p[:]
102
103
104 def node_name(node):
105     """ Find out a node's name
106
107     >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
108     XYZ
109     """
110
111     tempnode = deepcopy(node)
112
113     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
114         for e in tempnode.findall('.//%s' % p):
115             t = e.tail
116             e.clear()
117             e.tail = t
118     etree.strip_tags(tempnode, '*')
119     return tempnode.text
120
121
122 def xslt(xml, sheet, **kwargs):
123     if isinstance(xml, etree._Element):
124         xml = etree.ElementTree(xml)
125     with open(sheet) as xsltf:
126         transform = etree.XSLT(etree.parse(xsltf))
127         params = dict(
128             (key, transform.strparam(value))
129             for key, value in kwargs.items()
130         )
131         return transform(xml, **params)
132
133
134 def replace_characters(node):
135     def replace_chars(text):
136         if text is None:
137             return None
138         return text.replace(u"\ufeff", u"")\
139                    .replace("---", u"\u2014")\
140                    .replace("--", u"\u2013")\
141                    .replace(",,", u"\u201E")\
142                    .replace('"', u"\u201D")\
143                    .replace("'", u"\u2019")
144     if node.tag in ('uwaga', 'extra'):
145         t = node.tail
146         node.clear()
147         node.tail = t
148     node.text = replace_chars(node.text)
149     node.tail = replace_chars(node.tail)
150     for child in node:
151         replace_characters(child)
152
153
154 def find_annotations(annotations, source, part_no):
155     for child in source:
156         if child.tag in ('pe', 'pa', 'pt', 'pr'):
157             annotation = deepcopy(child)
158             number = str(len(annotations) + 1)
159             annotation.set('number', number)
160             annotation.set('part', str(part_no))
161             annotation.tail = ''
162             annotations.append(annotation)
163             tail = child.tail
164             child.clear()
165             child.tail = tail
166             child.text = number
167         if child.tag not in ('extra', 'uwaga'):
168             find_annotations(annotations, child, part_no)
169
170
171 class Stanza(object):
172     """
173     Converts / verse endings into verse elements in a stanza.
174
175     Slashes may only occur directly in the stanza. Any slashes in subelements
176     will be ignored, and the subelements will be put inside verse elements.
177
178     >>> s = etree.fromstring(
179     ...         "<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>"
180     ...     )
181     >>> Stanza(s).versify()
182     >>> print(etree.tostring(s, encoding='unicode', pretty_print=True).strip())
183     <strofa>
184       <wers_normalny>a <b>c</b><b>c</b></wers_normalny>
185       <wers_normalny>b<x>x/
186     y</x>c</wers_normalny>
187       <wers_normalny>d</wers_normalny>
188     </strofa>
189
190     """
191     def __init__(self, stanza_elem):
192         self.stanza = stanza_elem
193         self.verses = []
194         self.open_verse = None
195
196     def versify(self):
197         self.push_text(self.stanza.text)
198         for elem in self.stanza:
199             self.push_elem(elem)
200             self.push_text(elem.tail)
201         tail = self.stanza.tail
202         self.stanza.clear()
203         self.stanza.tail = tail
204         self.stanza.extend(
205             verse for verse in self.verses
206             if verse.text or len(verse) > 0
207         )
208
209     def open_normal_verse(self):
210         self.open_verse = self.stanza.makeelement("wers_normalny")
211         self.verses.append(self.open_verse)
212
213     def get_open_verse(self):
214         if self.open_verse is None:
215             self.open_normal_verse()
216         return self.open_verse
217
218     def push_text(self, text):
219         if not text:
220             return
221         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
222             if i:
223                 self.open_normal_verse()
224             if not verse_text.strip():
225                 continue
226             verse = self.get_open_verse()
227             if len(verse):
228                 verse[-1].tail = (verse[-1].tail or "") + verse_text
229             else:
230                 verse.text = (verse.text or "") + verse_text
231
232     def push_elem(self, elem):
233         if elem.tag.startswith("wers"):
234             verse = deepcopy(elem)
235             verse.tail = None
236             self.verses.append(verse)
237             self.open_verse = verse
238         else:
239             appended = deepcopy(elem)
240             appended.tail = None
241             self.get_open_verse().append(appended)
242
243
244 def replace_by_verse(tree):
245     """ Find stanzas and create new verses in place of a '/' character """
246
247     stanzas = tree.findall('.//' + WLNS('strofa'))
248     for stanza in stanzas:
249         Stanza(stanza).versify()
250
251
252 def add_to_manifest(manifest, partno):
253     """ Adds a node to the manifest section in content.opf file """
254
255     partstr = 'part%d' % partno
256     e = manifest.makeelement(
257         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
258                                'media-type': 'application/xhtml+xml'}
259     )
260     manifest.append(e)
261
262
263 def add_to_spine(spine, partno):
264     """ Adds a node to the spine section in content.opf file """
265
266     e = spine.makeelement(
267         OPFNS('itemref'),
268         attrib={'idref': 'part%d' % partno}
269     )
270     spine.append(e)
271
272
273 class TOC(object):
274     def __init__(self, name=None, part_href=None):
275         self.children = []
276         self.name = name
277         self.part_href = part_href
278         self.sub_number = None
279
280     def add(self, name, part_href, level=0, is_part=True, index=None):
281         assert level == 0 or index is None
282         if level > 0 and self.children:
283             return self.children[-1].add(name, part_href, level - 1, is_part)
284         else:
285             t = TOC(name)
286             t.part_href = part_href
287             if index is not None:
288                 self.children.insert(index, t)
289             else:
290                 self.children.append(t)
291             if not is_part:
292                 t.sub_number = len(self.children) + 1
293                 return t.sub_number
294
295     def append(self, toc):
296         self.children.append(toc)
297
298     def extend(self, toc):
299         self.children.extend(toc.children)
300
301     def depth(self):
302         if self.children:
303             return max((c.depth() for c in self.children)) + 1
304         else:
305             return 0
306
307     def href(self):
308         src = self.part_href
309         if self.sub_number is not None:
310             src += '#sub%d' % self.sub_number
311         return src
312
313     def write_to_xml(self, nav_map, counter=1):
314         for child in self.children:
315             nav_point = nav_map.makeelement(NCXNS('navPoint'))
316             nav_point.set('id', 'NavPoint-%d' % counter)
317             nav_point.set('playOrder', str(counter))
318
319             nav_label = nav_map.makeelement(NCXNS('navLabel'))
320             text = nav_map.makeelement(NCXNS('text'))
321             if child.name is not None:
322                 text.text = re.sub(r'\n', ' ', child.name)
323             else:
324                 text.text = child.name
325             nav_label.append(text)
326             nav_point.append(nav_label)
327
328             content = nav_map.makeelement(NCXNS('content'))
329             content.set('src', child.href())
330             nav_point.append(content)
331             nav_map.append(nav_point)
332             counter = child.write_to_xml(nav_point, counter + 1)
333         return counter
334
335     def html_part(self, depth=0):
336         texts = []
337         for child in self.children:
338             texts.append(
339                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
340                 (depth, child.href(), child.name))
341             texts.append(child.html_part(depth + 1))
342         return "\n".join(texts)
343
344     def html(self):
345         with open(get_resource('epub/toc.html'), 'rb') as f:
346             t = f.read().decode('utf-8')
347         return t % self.html_part()
348
349
350 def used_chars(element):
351     """ Lists characters used in an ETree Element """
352     chars = set((element.text or '') + (element.tail or ''))
353     for child in element:
354         chars = chars.union(used_chars(child))
355     return chars
356
357
358 def chop(main_text):
359     """ divide main content of the XML file into chunks """
360
361     # prepare a container for each chunk
362     part_xml = etree.Element('utwor')
363     etree.SubElement(part_xml, 'master')
364     main_xml_part = part_xml[0]  # master
365
366     last_node_part = False
367
368     # The below loop are workaround for a problem with epubs
369     # in drama ebooks without acts.
370     is_scene = False
371     is_act = False
372     for one_part in main_text:
373         name = one_part.tag
374         if name == 'naglowek_scena':
375             is_scene = True
376         elif name == 'naglowek_akt':
377             is_act = True
378
379     for one_part in main_text:
380         name = one_part.tag
381         if is_act is False and is_scene is True:
382             if name == 'naglowek_czesc':
383                 yield part_xml
384                 last_node_part = True
385                 main_xml_part[:] = [deepcopy(one_part)]
386             elif not last_node_part and name == "naglowek_scena":
387                 yield part_xml
388                 main_xml_part[:] = [deepcopy(one_part)]
389             else:
390                 main_xml_part.append(deepcopy(one_part))
391                 last_node_part = False
392         else:
393             if name == 'naglowek_czesc':
394                 yield part_xml
395                 last_node_part = True
396                 main_xml_part[:] = [deepcopy(one_part)]
397             elif (not last_node_part
398                   and name in (
399                       "naglowek_rozdzial", "naglowek_akt", "srodtytul"
400                   )):
401                 yield part_xml
402                 main_xml_part[:] = [deepcopy(one_part)]
403             else:
404                 main_xml_part.append(deepcopy(one_part))
405                 last_node_part = False
406     yield part_xml
407
408
409 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False,
410                     _empty_html_static=[]):
411     """
412     Transforms one chunk, returns a HTML string, a TOC object
413     and a set of used characters.
414     """
415
416     toc = TOC()
417     for element in chunk_xml[0]:
418         if element.tag == "naglowek_czesc":
419             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
420         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
421             toc.add(node_name(element), "part%d.html" % chunk_no)
422         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
423             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no,
424                                 level=1, is_part=False)
425             element.set('sub', str(subnumber))
426     if empty:
427         if not _empty_html_static:
428             with open(get_resource('epub/emptyChunk.html')) as f:
429                 _empty_html_static.append(f.read())
430         chars = set()
431         output_html = _empty_html_static[0]
432     else:
433         find_annotations(annotations, chunk_xml, chunk_no)
434         replace_by_verse(chunk_xml)
435         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
436         chars = used_chars(html_tree.getroot())
437         output_html = etree.tostring(
438             html_tree, pretty_print=True, xml_declaration=True,
439             encoding="utf-8",
440             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
441                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
442         )
443     return output_html, toc, chars
444
445
446 def transform(wldoc, verbose=False, style=None, html_toc=False,
447               sample=None, cover=None, flags=None, hyphenate=False,
448               ilustr_path='', output_type='epub'):
449     """ produces a EPUB file
450
451     sample=n: generate sample e-book (with at least n paragraphs)
452     cover: a cover.Cover factory or True for default
453     flags: less-advertising, without-fonts, working-copy
454     """
455
456     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
457         """ processes one input file and proceeds to its children """
458
459         replace_characters(wldoc.edoc.getroot())
460
461         hyphenator = set_hyph_language(
462             wldoc.edoc.getroot()
463         ) if hyphenate else None
464         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
465
466         # every input file will have a TOC entry,
467         # pointing to starting chunk
468         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
469         chars = set()
470         if first:
471             # write book title page
472             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'),
473                              outputtype=output_type)
474             chars = used_chars(html_tree.getroot())
475             html_string = etree.tostring(
476                 html_tree, pretty_print=True, xml_declaration=True,
477                 encoding="utf-8",
478                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
479                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
480             )
481             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
482             # add a title page TOC entry
483             toc.add(u"Strona tytułowa", "title.html")
484         elif wldoc.book_info.parts:
485             # write title page for every parent
486             if sample is not None and sample <= 0:
487                 chars = set()
488                 html_string = open(get_resource('epub/emptyChunk.html')).read()
489             else:
490                 html_tree = xslt(wldoc.edoc,
491                                  get_resource('epub/xsltChunkTitle.xsl'))
492                 chars = used_chars(html_tree.getroot())
493                 html_string = etree.tostring(
494                     html_tree, pretty_print=True, xml_declaration=True,
495                     encoding="utf-8",
496                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"'
497                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
498                 )
499             zip.writestr('OPS/part%d.html' % chunk_counter,
500                          squeeze_whitespace(html_string))
501             add_to_manifest(manifest, chunk_counter)
502             add_to_spine(spine, chunk_counter)
503             chunk_counter += 1
504
505         if len(wldoc.edoc.getroot()) > 1:
506             # rdf before style master
507             main_text = wldoc.edoc.getroot()[1]
508         else:
509             # rdf in style master
510             main_text = wldoc.edoc.getroot()[0]
511             if main_text.tag == RDFNS('RDF'):
512                 main_text = None
513
514         if main_text is not None:
515             for chunk_xml in chop(main_text):
516                 empty = False
517                 if sample is not None:
518                     if sample <= 0:
519                         empty = True
520                     else:
521                         sample -= len(chunk_xml.xpath(
522                             '//strofa|//akap|//akap_cd|//akap_dialog'
523                         ))
524                 chunk_html, chunk_toc, chunk_chars = transform_chunk(
525                     chunk_xml, chunk_counter, annotations, empty)
526
527                 toc.extend(chunk_toc)
528                 chars = chars.union(chunk_chars)
529                 zip.writestr('OPS/part%d.html' % chunk_counter,
530                              squeeze_whitespace(chunk_html))
531                 add_to_manifest(manifest, chunk_counter)
532                 add_to_spine(spine, chunk_counter)
533                 chunk_counter += 1
534
535         for child in wldoc.parts():
536             child_toc, chunk_counter, chunk_chars, sample = transform_file(
537                 child, chunk_counter, first=False, sample=sample)
538             toc.append(child_toc)
539             chars = chars.union(chunk_chars)
540
541         return toc, chunk_counter, chars, sample
542
543     document = deepcopy(wldoc)
544     del wldoc
545
546     if flags:
547         for flag in flags:
548             document.edoc.getroot().set(flag, 'yes')
549
550     document.clean_ed_note()
551     document.clean_ed_note('abstrakt')
552
553     # add editors info
554     editors = document.editors()
555     if editors:
556         document.edoc.getroot().set('editors', u', '.join(sorted(
557             editor.readable() for editor in editors)))
558     if document.book_info.funders:
559         document.edoc.getroot().set('funders', u', '.join(
560             document.book_info.funders))
561     if document.book_info.thanks:
562         document.edoc.getroot().set('thanks', document.book_info.thanks)
563
564     opf = xslt(document.book_info.to_etree(),
565                get_resource('epub/xsltContent.xsl'))
566     manifest = opf.find('.//' + OPFNS('manifest'))
567     guide = opf.find('.//' + OPFNS('guide'))
568     spine = opf.find('.//' + OPFNS('spine'))
569
570     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub',
571                                      delete=False)
572     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
573
574     functions.reg_mathml_epub(zip)
575
576     if os.path.isdir(ilustr_path):
577         ilustr_elements = set(ilustr.get('src')
578                               for ilustr in document.edoc.findall('//ilustr'))
579         for i, filename in enumerate(os.listdir(ilustr_path)):
580             if filename not in ilustr_elements:
581                 continue
582             file_path = os.path.join(ilustr_path, filename)
583             zip.write(file_path, os.path.join('OPS', filename))
584             image_id = 'image%s' % i
585             manifest.append(etree.fromstring(
586                 '<item id="%s" href="%s" media-type="%s" />' % (
587                     image_id, filename, guess_type(file_path)[0])
588             ))
589
590     # write static elements
591     mime = zipfile.ZipInfo()
592     mime.filename = 'mimetype'
593     mime.compress_type = zipfile.ZIP_STORED
594     mime.extra = b''
595     zip.writestr(mime, b'application/epub+zip')
596     zip.writestr(
597         'META-INF/container.xml',
598         b'<?xml version="1.0" ?>'
599         b'<container version="1.0" '
600         b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
601         b'<rootfiles><rootfile full-path="OPS/content.opf" '
602         b'media-type="application/oebps-package+xml" />'
603         b'</rootfiles></container>'
604     )
605     zip.write(get_resource('res/wl-logo-small.png'),
606               os.path.join('OPS', 'logo_wolnelektury.png'))
607     zip.write(get_resource('res/jedenprocent.png'),
608               os.path.join('OPS', 'jedenprocent.png'))
609     if not style:
610         style = get_resource('epub/style.css')
611     zip.write(style, os.path.join('OPS', 'style.css'))
612
613     if cover:
614         if cover is True:
615             cover = make_cover
616
617         cover_file = BytesIO()
618         bound_cover = cover(document.book_info)
619         bound_cover.save(cover_file)
620         cover_name = 'cover.%s' % bound_cover.ext()
621         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
622         del cover_file
623
624         cover_tree = etree.parse(get_resource('epub/cover.html'))
625         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
626         zip.writestr('OPS/cover.html', etree.tostring(
627             cover_tree, pretty_print=True, xml_declaration=True,
628             encoding="utf-8",
629             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
630                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
631         ))
632
633         if bound_cover.uses_dc_cover:
634             if document.book_info.cover_by:
635                 document.edoc.getroot().set('data-cover-by',
636                                             document.book_info.cover_by)
637             if document.book_info.cover_source:
638                 document.edoc.getroot().set('data-cover-source',
639                                             document.book_info.cover_source)
640
641         manifest.append(etree.fromstring(
642             '<item id="cover" href="cover.html" '
643             'media-type="application/xhtml+xml" />'
644         ))
645         manifest.append(etree.fromstring(
646             '<item id="cover-image" href="%s" media-type="%s" />' % (
647                 cover_name, bound_cover.mime_type()
648             )
649         ))
650         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
651         opf.getroot()[0].append(etree.fromstring(
652             '<meta name="cover" content="cover-image"/>'
653         ))
654         guide.append(etree.fromstring(
655             '<reference href="cover.html" type="cover" title="Okładka"/>'
656         ))
657
658     annotations = etree.Element('annotations')
659
660     toc_file = etree.fromstring(
661         b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
662         b'"-//NISO//DTD ncx 2005-1//EN" '
663         b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
664         b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
665         b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
666         b'</navMap></ncx>'
667     )
668     nav_map = toc_file[-1]
669
670     if html_toc:
671         manifest.append(etree.fromstring(
672             '<item id="html_toc" href="toc.html" '
673             'media-type="application/xhtml+xml" />'
674         ))
675         spine.append(etree.fromstring(
676             '<itemref idref="html_toc" />'))
677         guide.append(etree.fromstring(
678             '<reference href="toc.html" type="toc" title="Spis treści"/>'
679         ))
680
681     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
682
683     if len(toc.children) < 2:
684         toc.add(u"Początek utworu", "part1.html")
685
686     # Last modifications in container files and EPUB creation
687     if len(annotations) > 0:
688         toc.add("Przypisy", "annotations.html")
689         manifest.append(etree.fromstring(
690             '<item id="annotations" href="annotations.html" '
691             'media-type="application/xhtml+xml" />'
692         ))
693         spine.append(etree.fromstring(
694             '<itemref idref="annotations" />'))
695         replace_by_verse(annotations)
696         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
697         chars = chars.union(used_chars(html_tree.getroot()))
698         zip.writestr('OPS/annotations.html', etree.tostring(
699             html_tree, pretty_print=True, xml_declaration=True,
700             encoding="utf-8",
701             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
702                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
703         ))
704
705     toc.add("Wesprzyj Wolne Lektury", "support.html")
706     manifest.append(etree.fromstring(
707         '<item id="support" href="support.html" '
708         'media-type="application/xhtml+xml" />'
709     ))
710     spine.append(etree.fromstring(
711         '<itemref idref="support" />'))
712     html_string = open(get_resource('epub/support.html'), 'rb').read()
713     chars.update(used_chars(etree.fromstring(html_string)))
714     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
715
716     toc.add("Strona redakcyjna", "last.html")
717     manifest.append(etree.fromstring(
718         '<item id="last" href="last.html" '
719         'media-type="application/xhtml+xml" />'
720     ))
721     spine.append(etree.fromstring(
722         '<itemref idref="last" />'))
723     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'),
724                      outputtype=output_type)
725     chars.update(used_chars(html_tree.getroot()))
726     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
727         html_tree, pretty_print=True, xml_declaration=True,
728         encoding="utf-8",
729         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
730                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
731     )))
732
733     if not flags or 'without-fonts' not in flags:
734         # strip fonts
735         tmpdir = mkdtemp('-librarian-epub')
736         try:
737             cwd = os.getcwd()
738         except OSError:
739             cwd = None
740
741         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)),
742                               'font-optimizer'))
743         for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
744                       'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
745             optimizer_call = ['perl', 'subset.pl', '--chars',
746                               ''.join(chars).encode('utf-8'),
747                               get_resource('fonts/' + fname),
748                               os.path.join(tmpdir, fname)]
749             env = {"PERL_USE_UNSAFE_INC": "1"}
750             if verbose:
751                 print("Running font-optimizer")
752                 subprocess.check_call(optimizer_call, env=env)
753             else:
754                 dev_null = open(os.devnull, 'w')
755                 subprocess.check_call(optimizer_call, stdout=dev_null,
756                                       stderr=dev_null, env=env)
757             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
758             manifest.append(etree.fromstring(
759                 '<item id="%s" href="%s" '
760                 'media-type="application/x-font-truetype" />'
761                 % (fname, fname)
762             ))
763         rmtree(tmpdir)
764         if cwd is not None:
765             os.chdir(cwd)
766     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
767                  xml_declaration=True, encoding="utf-8"))
768     title = document.book_info.title
769     attributes = ("dtb:uid", "dtb:depth", "dtb:totalPageCount",
770                   "dtb:maxPageNumber")
771     for st in attributes:
772         meta = toc_file.makeelement(NCXNS('meta'))
773         meta.set('name', st)
774         meta.set('content', '0')
775         toc_file[0].append(meta)
776     toc_file[0][0].set('content', str(document.book_info.url))
777     toc_file[0][1].set('content', str(toc.depth()))
778     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
779
780     # write TOC
781     if html_toc:
782         toc.add(u"Spis treści", "toc.html", index=1)
783         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
784     toc.write_to_xml(nav_map)
785     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
786                  xml_declaration=True, encoding="utf-8"))
787     zip.close()
788
789     return OutputFile.from_filename(output_file.name)