Fix XML entities left from MathML.
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import print_function, unicode_literals
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from six import BytesIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import make_cover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def squeeze_whitespace(s):
33     return re.sub(b'\\s+', b' ', s)
34
35
36 def set_hyph_language(source_tree):
37     def get_short_lng_code(text):
38         result = ''
39         text = ''.join(text)
40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
41             for line in f.read().decode('latin1').split('\n'):
42                 list = line.strip().split('|')
43                 if list[0] == text:
44                     result = list[2]
45         if result == '':
46             return text
47         else:
48             return result
49     bibl_lng = etree.XPath('//dc:language//text()',
50                            namespaces={'dc': str(DCNS)})(source_tree)
51     short_lng = get_short_lng_code(bibl_lng[0])
52     try:
53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
54                                        short_lng + '.dic'))
55     except:
56         pass
57
58
59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
61     for t in texts:
62         parent = t.getparent()
63         if hyph is not None:
64             newt = ''
65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
66             for w in wlist:
67                 newt += hyph.inserted(w, u'\u00AD')
68         else:
69             newt = t
70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
71         if t.is_text:
72             parent.text = newt
73         elif t.is_tail:
74             parent.tail = newt
75
76
77 def inner_xml(node):
78     """ returns node's text and children as a string
79
80     >>> print(inner_xml(etree.fromstring('<a>x<b>y</b>z</a>')))
81     x<b>y</b>z
82     """
83
84     nt = node.text if node.text is not None else ''
85     return ''.join([nt] + [etree.tostring(child, encoding='unicode') for child in node])
86
87
88 def set_inner_xml(node, text):
89     """ sets node's text and children from a string
90
91     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
92     >>> set_inner_xml(e, 'x<b>y</b>z')
93     >>> print(etree.tostring(e, encoding='unicode'))
94     <a>x<b>y</b>z</a>
95     """
96
97     p = etree.fromstring('<x>%s</x>' % text)
98     node.text = p.text
99     node[:] = p[:]
100
101
102 def node_name(node):
103     """ Find out a node's name
104
105     >>> print(node_name(etree.fromstring('<a>X<b>Y</b>Z</a>')))
106     XYZ
107     """
108
109     tempnode = deepcopy(node)
110
111     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
112         for e in tempnode.findall('.//%s' % p):
113             t = e.tail
114             e.clear()
115             e.tail = t
116     etree.strip_tags(tempnode, '*')
117     return tempnode.text
118
119
120 def xslt(xml, sheet, **kwargs):
121     if isinstance(xml, etree._Element):
122         xml = etree.ElementTree(xml)
123     with open(sheet) as xsltf:
124         transform = etree.XSLT(etree.parse(xsltf))
125         params = dict((key, transform.strparam(value)) for key, value in kwargs.items())
126         return transform(xml, **params)
127
128
129 def replace_characters(node):
130     def replace_chars(text):
131         if text is None:
132             return None
133         return text.replace(u"\ufeff", u"")\
134                    .replace("---", u"\u2014")\
135                    .replace("--", u"\u2013")\
136                    .replace(",,", u"\u201E")\
137                    .replace('"', u"\u201D")\
138                    .replace("'", u"\u2019")
139     if node.tag in ('uwaga', 'extra'):
140         t = node.tail
141         node.clear()
142         node.tail = t
143     node.text = replace_chars(node.text)
144     node.tail = replace_chars(node.tail)
145     for child in node:
146         replace_characters(child)
147
148
149 def find_annotations(annotations, source, part_no):
150     for child in source:
151         if child.tag in ('pe', 'pa', 'pt', 'pr'):
152             annotation = deepcopy(child)
153             number = str(len(annotations) + 1)
154             annotation.set('number', number)
155             annotation.set('part', str(part_no))
156             annotation.tail = ''
157             annotations.append(annotation)
158             tail = child.tail
159             child.clear()
160             child.tail = tail
161             child.text = number
162         if child.tag not in ('extra', 'uwaga'):
163             find_annotations(annotations, child, part_no)
164
165
166 class Stanza(object):
167     """
168     Converts / verse endings into verse elements in a stanza.
169
170     Slashes may only occur directly in the stanza. Any slashes in subelements
171     will be ignored, and the subelements will be put inside verse elements.
172
173     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
174     >>> Stanza(s).versify()
175     >>> print(etree.tostring(s, encoding='unicode'))
176     <strofa><wers_normalny>a <b>c</b><b>c</b></wers_normalny><wers_normalny>b<x>x/
177     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
178
179     """
180     def __init__(self, stanza_elem):
181         self.stanza = stanza_elem
182         self.verses = []
183         self.open_verse = None
184
185     def versify(self):
186         self.push_text(self.stanza.text)
187         for elem in self.stanza:
188             self.push_elem(elem)
189             self.push_text(elem.tail)
190         tail = self.stanza.tail
191         self.stanza.clear()
192         self.stanza.tail = tail
193         self.stanza.extend(verse for verse in self.verses if verse.text or len(verse) > 0)
194
195     def open_normal_verse(self):
196         self.open_verse = self.stanza.makeelement("wers_normalny")
197         self.verses.append(self.open_verse)
198
199     def get_open_verse(self):
200         if self.open_verse is None:
201             self.open_normal_verse()
202         return self.open_verse
203
204     def push_text(self, text):
205         if not text:
206             return
207         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
208             if i:
209                 self.open_normal_verse()
210             if not verse_text.strip():
211                 continue
212             verse = self.get_open_verse()
213             if len(verse):
214                 verse[-1].tail = (verse[-1].tail or "") + verse_text
215             else:
216                 verse.text = (verse.text or "") + verse_text
217
218     def push_elem(self, elem):
219         if elem.tag.startswith("wers"):
220             verse = deepcopy(elem)
221             verse.tail = None
222             self.verses.append(verse)
223             self.open_verse = verse
224         else:
225             appended = deepcopy(elem)
226             appended.tail = None
227             self.get_open_verse().append(appended)
228
229
230 def replace_by_verse(tree):
231     """ Find stanzas and create new verses in place of a '/' character """
232
233     stanzas = tree.findall('.//' + WLNS('strofa'))
234     for stanza in stanzas:
235         Stanza(stanza).versify()
236
237
238 def add_to_manifest(manifest, partno):
239     """ Adds a node to the manifest section in content.opf file """
240
241     partstr = 'part%d' % partno
242     e = manifest.makeelement(
243         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
244                                'media-type': 'application/xhtml+xml'}
245     )
246     manifest.append(e)
247
248
249 def add_to_spine(spine, partno):
250     """ Adds a node to the spine section in content.opf file """
251
252     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
253     spine.append(e)
254
255
256 class TOC(object):
257     def __init__(self, name=None, part_href=None):
258         self.children = []
259         self.name = name
260         self.part_href = part_href
261         self.sub_number = None
262
263     def add(self, name, part_href, level=0, is_part=True, index=None):
264         assert level == 0 or index is None
265         if level > 0 and self.children:
266             return self.children[-1].add(name, part_href, level - 1, is_part)
267         else:
268             t = TOC(name)
269             t.part_href = part_href
270             if index is not None:
271                 self.children.insert(index, t)
272             else:
273                 self.children.append(t)
274             if not is_part:
275                 t.sub_number = len(self.children) + 1
276                 return t.sub_number
277
278     def append(self, toc):
279         self.children.append(toc)
280
281     def extend(self, toc):
282         self.children.extend(toc.children)
283
284     def depth(self):
285         if self.children:
286             return max((c.depth() for c in self.children)) + 1
287         else:
288             return 0
289
290     def href(self):
291         src = self.part_href
292         if self.sub_number is not None:
293             src += '#sub%d' % self.sub_number
294         return src
295
296     def write_to_xml(self, nav_map, counter=1):
297         for child in self.children:
298             nav_point = nav_map.makeelement(NCXNS('navPoint'))
299             nav_point.set('id', 'NavPoint-%d' % counter)
300             nav_point.set('playOrder', str(counter))
301
302             nav_label = nav_map.makeelement(NCXNS('navLabel'))
303             text = nav_map.makeelement(NCXNS('text'))
304             if child.name is not None:
305                 text.text = re.sub(r'\n', ' ', child.name)
306             else:
307                 text.text = child.name
308             nav_label.append(text)
309             nav_point.append(nav_label)
310
311             content = nav_map.makeelement(NCXNS('content'))
312             content.set('src', child.href())
313             nav_point.append(content)
314             nav_map.append(nav_point)
315             counter = child.write_to_xml(nav_point, counter + 1)
316         return counter
317
318     def html_part(self, depth=0):
319         texts = []
320         for child in self.children:
321             texts.append(
322                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
323                 (depth, child.href(), child.name))
324             texts.append(child.html_part(depth + 1))
325         return "\n".join(texts)
326
327     def html(self):
328         with open(get_resource('epub/toc.html'), 'rb') as f:
329             t = f.read().decode('utf-8')
330         return t % self.html_part()
331
332
333 def used_chars(element):
334     """ Lists characters used in an ETree Element """
335     chars = set((element.text or '') + (element.tail or ''))
336     for child in element:
337         chars = chars.union(used_chars(child))
338     return chars
339
340
341 def chop(main_text):
342     """ divide main content of the XML file into chunks """
343
344     # prepare a container for each chunk
345     part_xml = etree.Element('utwor')
346     etree.SubElement(part_xml, 'master')
347     main_xml_part = part_xml[0]  # master
348
349     last_node_part = False
350
351     # the below loop are workaround for a problem with epubs in drama ebooks without acts
352     is_scene = False
353     is_act = False
354     for one_part in main_text:
355         name = one_part.tag
356         if name == 'naglowek_scena':
357             is_scene = True
358         elif name == 'naglowek_akt':
359             is_act = True
360
361     for one_part in main_text:
362         name = one_part.tag
363         if is_act is False and is_scene is True:
364             if name == 'naglowek_czesc':
365                 yield part_xml
366                 last_node_part = True
367                 main_xml_part[:] = [deepcopy(one_part)]
368             elif not last_node_part and name == "naglowek_scena":
369                 yield part_xml
370                 main_xml_part[:] = [deepcopy(one_part)]
371             else:
372                 main_xml_part.append(deepcopy(one_part))
373                 last_node_part = False
374         else:
375             if name == 'naglowek_czesc':
376                 yield part_xml
377                 last_node_part = True
378                 main_xml_part[:] = [deepcopy(one_part)]
379             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
380                 yield part_xml
381                 main_xml_part[:] = [deepcopy(one_part)]
382             else:
383                 main_xml_part.append(deepcopy(one_part))
384                 last_node_part = False
385     yield part_xml
386
387
388 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
389     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
390
391     toc = TOC()
392     for element in chunk_xml[0]:
393         if element.tag == "naglowek_czesc":
394             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
395         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
396             toc.add(node_name(element), "part%d.html" % chunk_no)
397         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
398             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
399             element.set('sub', str(subnumber))
400     if empty:
401         if not _empty_html_static:
402             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
403         chars = set()
404         output_html = _empty_html_static[0]
405     else:
406         find_annotations(annotations, chunk_xml, chunk_no)
407         replace_by_verse(chunk_xml)
408         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
409         chars = used_chars(html_tree.getroot())
410         output_html = etree.tostring(
411             html_tree, pretty_print=True, xml_declaration=True,
412             encoding="utf-8",
413             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
414                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
415         )
416     return output_html, toc, chars
417
418
419 def transform(wldoc, verbose=False, style=None, html_toc=False,
420               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
421     """ produces a EPUB file
422
423     sample=n: generate sample e-book (with at least n paragraphs)
424     cover: a cover.Cover factory or True for default
425     flags: less-advertising, without-fonts, working-copy
426     """
427
428     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
429         """ processes one input file and proceeds to its children """
430
431         replace_characters(wldoc.edoc.getroot())
432
433         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
434         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
435
436         # every input file will have a TOC entry,
437         # pointing to starting chunk
438         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
439         chars = set()
440         if first:
441             # write book title page
442             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
443             chars = used_chars(html_tree.getroot())
444             html_string = etree.tostring(
445                 html_tree, pretty_print=True, xml_declaration=True,
446                 encoding="utf-8",
447                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
448                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
449             )
450             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
451             # add a title page TOC entry
452             toc.add(u"Strona tytułowa", "title.html")
453         elif wldoc.book_info.parts:
454             # write title page for every parent
455             if sample is not None and sample <= 0:
456                 chars = set()
457                 html_string = open(get_resource('epub/emptyChunk.html')).read()
458             else:
459                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
460                 chars = used_chars(html_tree.getroot())
461                 html_string = etree.tostring(
462                     html_tree, pretty_print=True, xml_declaration=True,
463                     encoding="utf-8",
464                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
465                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
466                 )
467             zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
468             add_to_manifest(manifest, chunk_counter)
469             add_to_spine(spine, chunk_counter)
470             chunk_counter += 1
471
472         if len(wldoc.edoc.getroot()) > 1:
473             # rdf before style master
474             main_text = wldoc.edoc.getroot()[1]
475         else:
476             # rdf in style master
477             main_text = wldoc.edoc.getroot()[0]
478             if main_text.tag == RDFNS('RDF'):
479                 main_text = None
480
481         if main_text is not None:
482             for chunk_xml in chop(main_text):
483                 empty = False
484                 if sample is not None:
485                     if sample <= 0:
486                         empty = True
487                     else:
488                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
489                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
490
491                 toc.extend(chunk_toc)
492                 chars = chars.union(chunk_chars)
493                 zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
494                 add_to_manifest(manifest, chunk_counter)
495                 add_to_spine(spine, chunk_counter)
496                 chunk_counter += 1
497
498         for child in wldoc.parts():
499             child_toc, chunk_counter, chunk_chars, sample = transform_file(
500                 child, chunk_counter, first=False, sample=sample)
501             toc.append(child_toc)
502             chars = chars.union(chunk_chars)
503
504         return toc, chunk_counter, chars, sample
505
506     document = deepcopy(wldoc)
507     del wldoc
508
509     if flags:
510         for flag in flags:
511             document.edoc.getroot().set(flag, 'yes')
512
513     document.clean_ed_note()
514     document.clean_ed_note('abstrakt')
515
516     # add editors info
517     editors = document.editors()
518     if editors:
519         document.edoc.getroot().set('editors', u', '.join(sorted(
520             editor.readable() for editor in editors)))
521     if document.book_info.funders:
522         document.edoc.getroot().set('funders', u', '.join(
523             document.book_info.funders))
524     if document.book_info.thanks:
525         document.edoc.getroot().set('thanks', document.book_info.thanks)
526
527     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
528     manifest = opf.find('.//' + OPFNS('manifest'))
529     guide = opf.find('.//' + OPFNS('guide'))
530     spine = opf.find('.//' + OPFNS('spine'))
531
532     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
533     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
534
535     functions.reg_mathml_epub(zip)
536
537     if os.path.isdir(ilustr_path):
538         ilustr_elements = set(ilustr.get('src') for ilustr in document.edoc.findall('//ilustr'))
539         for i, filename in enumerate(os.listdir(ilustr_path)):
540             if filename not in ilustr_elements:
541                 continue
542             file_path = os.path.join(ilustr_path, filename)
543             zip.write(file_path, os.path.join('OPS', filename))
544             image_id = 'image%s' % i
545             manifest.append(etree.fromstring(
546                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
547
548     # write static elements
549     mime = zipfile.ZipInfo()
550     mime.filename = 'mimetype'
551     mime.compress_type = zipfile.ZIP_STORED
552     mime.extra = b''
553     zip.writestr(mime, b'application/epub+zip')
554     zip.writestr(
555         'META-INF/container.xml',
556         b'<?xml version="1.0" ?>'
557         b'<container version="1.0" '
558         b'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
559         b'<rootfiles><rootfile full-path="OPS/content.opf" '
560         b'media-type="application/oebps-package+xml" />'
561         b'</rootfiles></container>'
562     )
563     zip.write(get_resource('res/wl-logo-small.png'),
564               os.path.join('OPS', 'logo_wolnelektury.png'))
565     zip.write(get_resource('res/jedenprocent.png'),
566               os.path.join('OPS', 'jedenprocent.png'))
567     if not style:
568         style = get_resource('epub/style.css')
569     zip.write(style, os.path.join('OPS', 'style.css'))
570
571     if cover:
572         if cover is True:
573             cover = make_cover
574
575         cover_file = BytesIO()
576         bound_cover = cover(document.book_info)
577         bound_cover.save(cover_file)
578         cover_name = 'cover.%s' % bound_cover.ext()
579         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
580         del cover_file
581
582         cover_tree = etree.parse(get_resource('epub/cover.html'))
583         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
584         zip.writestr('OPS/cover.html', etree.tostring(
585             cover_tree, pretty_print=True, xml_declaration=True,
586             encoding="utf-8",
587             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
588                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
589         ))
590
591         if bound_cover.uses_dc_cover:
592             if document.book_info.cover_by:
593                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
594             if document.book_info.cover_source:
595                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
596
597         manifest.append(etree.fromstring(
598             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
599         manifest.append(etree.fromstring(
600             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
601         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
602         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
603         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
604
605     annotations = etree.Element('annotations')
606
607     toc_file = etree.fromstring(
608         b'<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
609         b'"-//NISO//DTD ncx 2005-1//EN" '
610         b'"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
611         b'<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
612         b'version="2005-1"><head></head><docTitle></docTitle><navMap>'
613         b'</navMap></ncx>'
614     )
615     nav_map = toc_file[-1]
616
617     if html_toc:
618         manifest.append(etree.fromstring(
619             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
620         spine.append(etree.fromstring(
621             '<itemref idref="html_toc" />'))
622         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
623
624     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
625
626     if len(toc.children) < 2:
627         toc.add(u"Początek utworu", "part1.html")
628
629     # Last modifications in container files and EPUB creation
630     if len(annotations) > 0:
631         toc.add("Przypisy", "annotations.html")
632         manifest.append(etree.fromstring(
633             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
634         spine.append(etree.fromstring(
635             '<itemref idref="annotations" />'))
636         replace_by_verse(annotations)
637         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
638         chars = chars.union(used_chars(html_tree.getroot()))
639         zip.writestr('OPS/annotations.html', etree.tostring(
640             html_tree, pretty_print=True, xml_declaration=True,
641             encoding="utf-8",
642             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
643                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
644         ))
645
646     toc.add("Wesprzyj Wolne Lektury", "support.html")
647     manifest.append(etree.fromstring(
648         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
649     spine.append(etree.fromstring(
650         '<itemref idref="support" />'))
651     html_string = open(get_resource('epub/support.html'), 'rb').read()
652     chars.update(used_chars(etree.fromstring(html_string)))
653     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
654
655     toc.add("Strona redakcyjna", "last.html")
656     manifest.append(etree.fromstring(
657         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
658     spine.append(etree.fromstring(
659         '<itemref idref="last" />'))
660     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
661     chars.update(used_chars(html_tree.getroot()))
662     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
663         html_tree, pretty_print=True, xml_declaration=True,
664         encoding="utf-8",
665         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
666                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
667     )))
668
669     if not flags or 'without-fonts' not in flags:
670         # strip fonts
671         tmpdir = mkdtemp('-librarian-epub')
672         try:
673             cwd = os.getcwd()
674         except OSError:
675             cwd = None
676
677         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
678         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
679             optimizer_call = ['perl', 'subset.pl', '--chars',
680                               ''.join(chars).encode('utf-8'),
681                               get_resource('fonts/' + fname),
682                               os.path.join(tmpdir, fname)]
683             env = {"PERL_USE_UNSAFE_INC": "1"}
684             if verbose:
685                 print("Running font-optimizer")
686                 subprocess.check_call(optimizer_call, env=env)
687             else:
688                 dev_null = open(os.devnull, 'w')
689                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
690             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
691             manifest.append(etree.fromstring(
692                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
693         rmtree(tmpdir)
694         if cwd is not None:
695             os.chdir(cwd)
696     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
697                  xml_declaration=True, encoding="utf-8"))
698     title = document.book_info.title
699     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
700     for st in attributes:
701         meta = toc_file.makeelement(NCXNS('meta'))
702         meta.set('name', st)
703         meta.set('content', '0')
704         toc_file[0].append(meta)
705     toc_file[0][0].set('content', str(document.book_info.url))
706     toc_file[0][1].set('content', str(toc.depth()))
707     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
708
709     # write TOC
710     if html_toc:
711         toc.add(u"Spis treści", "toc.html", index=1)
712         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
713     toc.write_to_xml(nav_map)
714     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
715                  xml_declaration=True, encoding="utf-8"))
716     zip.close()
717
718     return OutputFile.from_filename(output_file.name)