fix epub for weird corner case
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import make_cover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def set_hyph_language(source_tree):
33     def get_short_lng_code(text):
34         result = ''
35         text = ''.join(text)
36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
37             for line in f:
38                 list = line.strip().split('|')
39                 if list[0] == text:
40                     result = list[2]
41         if result == '':
42             return text
43         else:
44             return result
45     bibl_lng = etree.XPath('//dc:language//text()',
46                            namespaces={'dc': str(DCNS)})(source_tree)
47     short_lng = get_short_lng_code(bibl_lng[0])
48     try:
49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
50                                        short_lng + '.dic'))
51     except:
52         pass
53
54
55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
57     for t in texts:
58         parent = t.getparent()
59         if hyph is not None:
60             newt = ''
61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
62             for w in wlist:
63                 newt += hyph.inserted(w, u'\u00AD')
64         else:
65             newt = t
66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
67         if t.is_text:
68             parent.text = newt
69         elif t.is_tail:
70             parent.tail = newt
71
72
73 def inner_xml(node):
74     """ returns node's text and children as a string
75
76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
77     x<b>y</b>z
78     """
79
80     nt = node.text if node.text is not None else ''
81     return ''.join([nt] + [etree.tostring(child) for child in node])
82
83
84 def set_inner_xml(node, text):
85     """ sets node's text and children from a string
86
87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
88     >>> set_inner_xml(e, 'x<b>y</b>z')
89     >>> print etree.tostring(e)
90     <a>x<b>y</b>z</a>
91     """
92
93     p = etree.fromstring('<x>%s</x>' % text)
94     node.text = p.text
95     node[:] = p[:]
96
97
98 def node_name(node):
99     """ Find out a node's name
100
101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
102     XYZ
103     """
104
105     tempnode = deepcopy(node)
106
107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
108         for e in tempnode.findall('.//%s' % p):
109             t = e.tail
110             e.clear()
111             e.tail = t
112     etree.strip_tags(tempnode, '*')
113     return tempnode.text
114
115
116 def xslt(xml, sheet, **kwargs):
117     if isinstance(xml, etree._Element):
118         xml = etree.ElementTree(xml)
119     with open(sheet) as xsltf:
120         transform = etree.XSLT(etree.parse(xsltf))
121         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
122         return transform(xml, **params)
123
124
125 def replace_characters(node):
126     def replace_chars(text):
127         if text is None:
128             return None
129         return text.replace(u"\ufeff", u"")\
130                    .replace("---", u"\u2014")\
131                    .replace("--", u"\u2013")\
132                    .replace(",,", u"\u201E")\
133                    .replace('"', u"\u201D")\
134                    .replace("'", u"\u2019")
135     if node.tag in ('uwaga', 'extra'):
136         t = node.tail
137         node.clear()
138         node.tail = t
139     node.text = replace_chars(node.text)
140     node.tail = replace_chars(node.tail)
141     for child in node:
142         replace_characters(child)
143
144
145 def find_annotations(annotations, source, part_no):
146     for child in source:
147         if child.tag in ('pe', 'pa', 'pt', 'pr'):
148             annotation = deepcopy(child)
149             number = str(len(annotations) + 1)
150             annotation.set('number', number)
151             annotation.set('part', str(part_no))
152             annotation.tail = ''
153             annotations.append(annotation)
154             tail = child.tail
155             child.clear()
156             child.tail = tail
157             child.text = number
158         if child.tag not in ('extra', 'uwaga'):
159             find_annotations(annotations, child, part_no)
160
161
162 class Stanza(object):
163     """
164     Converts / verse endings into verse elements in a stanza.
165
166     Slashes may only occur directly in the stanza. Any slashes in subelements
167     will be ignored, and the subelements will be put inside verse elements.
168
169     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
170     >>> Stanza(s).versify()
171     >>> print etree.tostring(s)
172     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
173     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
174
175     """
176     def __init__(self, stanza_elem):
177         self.stanza = stanza_elem
178         self.verses = []
179         self.open_verse = None
180
181     def versify(self):
182         self.push_text(self.stanza.text)
183         for elem in self.stanza:
184             self.push_elem(elem)
185             self.push_text(elem.tail)
186         tail = self.stanza.tail
187         self.stanza.clear()
188         self.stanza.tail = tail
189         self.stanza.extend(self.verses)
190
191     def open_normal_verse(self):
192         self.open_verse = self.stanza.makeelement("wers_normalny")
193         self.verses.append(self.open_verse)
194
195     def get_open_verse(self):
196         if self.open_verse is None:
197             self.open_normal_verse()
198         return self.open_verse
199
200     def push_text(self, text):
201         if not text:
202             return
203         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
204             if i:
205                 self.open_normal_verse()
206             if not verse_text.strip():
207                 continue
208             verse = self.get_open_verse()
209             if len(verse):
210                 verse[-1].tail = (verse[-1].tail or "") + verse_text
211             else:
212                 verse.text = (verse.text or "") + verse_text
213
214     def push_elem(self, elem):
215         if elem.tag.startswith("wers"):
216             verse = deepcopy(elem)
217             verse.tail = None
218             self.verses.append(verse)
219             self.open_verse = verse
220         else:
221             appended = deepcopy(elem)
222             appended.tail = None
223             self.get_open_verse().append(appended)
224
225
226 def replace_by_verse(tree):
227     """ Find stanzas and create new verses in place of a '/' character """
228
229     stanzas = tree.findall('.//' + WLNS('strofa'))
230     for stanza in stanzas:
231         Stanza(stanza).versify()
232
233
234 def add_to_manifest(manifest, partno):
235     """ Adds a node to the manifest section in content.opf file """
236
237     partstr = 'part%d' % partno
238     e = manifest.makeelement(
239         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
240                                'media-type': 'application/xhtml+xml'}
241     )
242     manifest.append(e)
243
244
245 def add_to_spine(spine, partno):
246     """ Adds a node to the spine section in content.opf file """
247
248     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
249     spine.append(e)
250
251
252 class TOC(object):
253     def __init__(self, name=None, part_href=None):
254         self.children = []
255         self.name = name
256         self.part_href = part_href
257         self.sub_number = None
258
259     def add(self, name, part_href, level=0, is_part=True, index=None):
260         assert level == 0 or index is None
261         if level > 0 and self.children:
262             return self.children[-1].add(name, part_href, level - 1, is_part)
263         else:
264             t = TOC(name)
265             t.part_href = part_href
266             if index is not None:
267                 self.children.insert(index, t)
268             else:
269                 self.children.append(t)
270             if not is_part:
271                 t.sub_number = len(self.children) + 1
272                 return t.sub_number
273
274     def append(self, toc):
275         self.children.append(toc)
276
277     def extend(self, toc):
278         self.children.extend(toc.children)
279
280     def depth(self):
281         if self.children:
282             return max((c.depth() for c in self.children)) + 1
283         else:
284             return 0
285
286     def href(self):
287         src = self.part_href
288         if self.sub_number is not None:
289             src += '#sub%d' % self.sub_number
290         return src
291
292     def write_to_xml(self, nav_map, counter=1):
293         for child in self.children:
294             nav_point = nav_map.makeelement(NCXNS('navPoint'))
295             nav_point.set('id', 'NavPoint-%d' % counter)
296             nav_point.set('playOrder', str(counter))
297
298             nav_label = nav_map.makeelement(NCXNS('navLabel'))
299             text = nav_map.makeelement(NCXNS('text'))
300             if child.name is not None:
301                 text.text = re.sub(r'\n', ' ', child.name)
302             else:
303                 text.text = child.name
304             nav_label.append(text)
305             nav_point.append(nav_label)
306
307             content = nav_map.makeelement(NCXNS('content'))
308             content.set('src', child.href())
309             nav_point.append(content)
310             nav_map.append(nav_point)
311             counter = child.write_to_xml(nav_point, counter + 1)
312         return counter
313
314     def html_part(self, depth=0):
315         texts = []
316         for child in self.children:
317             texts.append(
318                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
319                 (depth, child.href(), child.name))
320             texts.append(child.html_part(depth + 1))
321         return "\n".join(texts)
322
323     def html(self):
324         with open(get_resource('epub/toc.html')) as f:
325             t = unicode(f.read(), 'utf-8')
326         return t % self.html_part()
327
328
329 def used_chars(element):
330     """ Lists characters used in an ETree Element """
331     chars = set((element.text or '') + (element.tail or ''))
332     for child in element:
333         chars = chars.union(used_chars(child))
334     return chars
335
336
337 def chop(main_text):
338     """ divide main content of the XML file into chunks """
339
340     # prepare a container for each chunk
341     part_xml = etree.Element('utwor')
342     etree.SubElement(part_xml, 'master')
343     main_xml_part = part_xml[0]  # master
344
345     last_node_part = False
346
347     # the below loop are workaround for a problem with epubs in drama ebooks without acts
348     is_scene = False
349     is_act = False
350     for one_part in main_text:
351         name = one_part.tag
352         if name == 'naglowek_scena':
353             is_scene = True
354         elif name == 'naglowek_akt':
355             is_act = True
356
357     for one_part in main_text:
358         name = one_part.tag
359         if is_act is False and is_scene is True:
360             if name == 'naglowek_czesc':
361                 yield part_xml
362                 last_node_part = True
363                 main_xml_part[:] = [deepcopy(one_part)]
364             elif not last_node_part and name == "naglowek_scena":
365                 yield part_xml
366                 main_xml_part[:] = [deepcopy(one_part)]
367             else:
368                 main_xml_part.append(deepcopy(one_part))
369                 last_node_part = False
370         else:
371             if name == 'naglowek_czesc':
372                 yield part_xml
373                 last_node_part = True
374                 main_xml_part[:] = [deepcopy(one_part)]
375             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
376                 yield part_xml
377                 main_xml_part[:] = [deepcopy(one_part)]
378             else:
379                 main_xml_part.append(deepcopy(one_part))
380                 last_node_part = False
381     yield part_xml
382
383
384 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
385     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
386
387     toc = TOC()
388     for element in chunk_xml[0]:
389         if element.tag == "naglowek_czesc":
390             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
391         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
392             toc.add(node_name(element), "part%d.html" % chunk_no)
393         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
394             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
395             element.set('sub', str(subnumber))
396     if empty:
397         if not _empty_html_static:
398             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
399         chars = set()
400         output_html = _empty_html_static[0]
401     else:
402         find_annotations(annotations, chunk_xml, chunk_no)
403         replace_by_verse(chunk_xml)
404         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
405         chars = used_chars(html_tree.getroot())
406         output_html = etree.tostring(
407             html_tree, pretty_print=True, xml_declaration=True,
408             encoding="utf-8",
409             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
410                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
411         )
412     return output_html, toc, chars
413
414
415 def transform(wldoc, verbose=False, style=None, html_toc=False,
416               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
417     """ produces a EPUB file
418
419     sample=n: generate sample e-book (with at least n paragraphs)
420     cover: a cover.Cover factory or True for default
421     flags: less-advertising, without-fonts, working-copy
422     """
423
424     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
425         """ processes one input file and proceeds to its children """
426
427         replace_characters(wldoc.edoc.getroot())
428
429         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
430         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
431
432         # every input file will have a TOC entry,
433         # pointing to starting chunk
434         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
435         chars = set()
436         if first:
437             # write book title page
438             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
439             chars = used_chars(html_tree.getroot())
440             zip.writestr(
441                 'OPS/title.html',
442                 etree.tostring(
443                     html_tree, pretty_print=True, xml_declaration=True,
444                     encoding="utf-8",
445                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
446                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
447                 )
448             )
449             # add a title page TOC entry
450             toc.add(u"Strona tytułowa", "title.html")
451         elif wldoc.book_info.parts:
452             # write title page for every parent
453             if sample is not None and sample <= 0:
454                 chars = set()
455                 html_string = open(get_resource('epub/emptyChunk.html')).read()
456             else:
457                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
458                 chars = used_chars(html_tree.getroot())
459                 html_string = etree.tostring(
460                     html_tree, pretty_print=True, xml_declaration=True,
461                     encoding="utf-8",
462                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
463                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
464                 )
465             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
466             add_to_manifest(manifest, chunk_counter)
467             add_to_spine(spine, chunk_counter)
468             chunk_counter += 1
469
470         if len(wldoc.edoc.getroot()) > 1:
471             # rdf before style master
472             main_text = wldoc.edoc.getroot()[1]
473         else:
474             # rdf in style master
475             main_text = wldoc.edoc.getroot()[0]
476             if main_text.tag == RDFNS('RDF'):
477                 main_text = None
478
479         if main_text is not None:
480             for chunk_xml in chop(main_text):
481                 empty = False
482                 if sample is not None:
483                     if sample <= 0:
484                         empty = True
485                     else:
486                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
487                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
488
489                 toc.extend(chunk_toc)
490                 chars = chars.union(chunk_chars)
491                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
492                 add_to_manifest(manifest, chunk_counter)
493                 add_to_spine(spine, chunk_counter)
494                 chunk_counter += 1
495
496         for child in wldoc.parts():
497             child_toc, chunk_counter, chunk_chars, sample = transform_file(
498                 child, chunk_counter, first=False, sample=sample)
499             toc.append(child_toc)
500             chars = chars.union(chunk_chars)
501
502         return toc, chunk_counter, chars, sample
503
504     document = deepcopy(wldoc)
505     del wldoc
506
507     if flags:
508         for flag in flags:
509             document.edoc.getroot().set(flag, 'yes')
510
511     document.clean_ed_note()
512     document.clean_ed_note('abstrakt')
513
514     # add editors info
515     editors = document.editors()
516     if editors:
517         document.edoc.getroot().set('editors', u', '.join(sorted(
518             editor.readable() for editor in editors)))
519     if document.book_info.funders:
520         document.edoc.getroot().set('funders', u', '.join(
521             document.book_info.funders))
522     if document.book_info.thanks:
523         document.edoc.getroot().set('thanks', document.book_info.thanks)
524
525     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
526     manifest = opf.find('.//' + OPFNS('manifest'))
527     guide = opf.find('.//' + OPFNS('guide'))
528     spine = opf.find('.//' + OPFNS('spine'))
529
530     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
531     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
532
533     functions.reg_mathml_epub(zip)
534
535     if os.path.isdir(ilustr_path):
536         for i, filename in enumerate(os.listdir(ilustr_path)):
537             file_path = os.path.join(ilustr_path, filename)
538             zip.write(file_path, os.path.join('OPS', filename))
539             image_id = 'image%s' % i
540             manifest.append(etree.fromstring(
541                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
542
543     # write static elements
544     mime = zipfile.ZipInfo()
545     mime.filename = 'mimetype'
546     mime.compress_type = zipfile.ZIP_STORED
547     mime.extra = ''
548     zip.writestr(mime, 'application/epub+zip')
549     zip.writestr(
550         'META-INF/container.xml',
551         '<?xml version="1.0" ?>'
552         '<container version="1.0" '
553         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
554         '<rootfiles><rootfile full-path="OPS/content.opf" '
555         'media-type="application/oebps-package+xml" />'
556         '</rootfiles></container>'
557     )
558     zip.write(get_resource('res/wl-logo-small.png'),
559               os.path.join('OPS', 'logo_wolnelektury.png'))
560     zip.write(get_resource('res/jedenprocent.png'),
561               os.path.join('OPS', 'jedenprocent.png'))
562     if not style:
563         style = get_resource('epub/style.css')
564     zip.write(style, os.path.join('OPS', 'style.css'))
565
566     if cover:
567         if cover is True:
568             cover = make_cover
569
570         cover_file = StringIO()
571         bound_cover = cover(document.book_info)
572         bound_cover.save(cover_file)
573         cover_name = 'cover.%s' % bound_cover.ext()
574         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
575         del cover_file
576
577         cover_tree = etree.parse(get_resource('epub/cover.html'))
578         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
579         zip.writestr('OPS/cover.html', etree.tostring(
580             cover_tree, pretty_print=True, xml_declaration=True,
581             encoding="utf-8",
582             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
583                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
584         ))
585
586         if bound_cover.uses_dc_cover:
587             if document.book_info.cover_by:
588                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
589             if document.book_info.cover_source:
590                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
591
592         manifest.append(etree.fromstring(
593             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
594         manifest.append(etree.fromstring(
595             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
596         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
597         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
598         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
599
600     annotations = etree.Element('annotations')
601
602     toc_file = etree.fromstring(
603         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
604         '"-//NISO//DTD ncx 2005-1//EN" '
605         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
606         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
607         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
608         '</navMap></ncx>'
609     )
610     nav_map = toc_file[-1]
611
612     if html_toc:
613         manifest.append(etree.fromstring(
614             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
615         spine.append(etree.fromstring(
616             '<itemref idref="html_toc" />'))
617         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
618
619     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
620
621     if len(toc.children) < 2:
622         toc.add(u"Początek utworu", "part1.html")
623
624     # Last modifications in container files and EPUB creation
625     if len(annotations) > 0:
626         toc.add("Przypisy", "annotations.html")
627         manifest.append(etree.fromstring(
628             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
629         spine.append(etree.fromstring(
630             '<itemref idref="annotations" />'))
631         replace_by_verse(annotations)
632         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
633         chars = chars.union(used_chars(html_tree.getroot()))
634         zip.writestr('OPS/annotations.html', etree.tostring(
635             html_tree, pretty_print=True, xml_declaration=True,
636             encoding="utf-8",
637             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
638                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
639         ))
640
641     toc.add("Wesprzyj Wolne Lektury", "support.html")
642     manifest.append(etree.fromstring(
643         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
644     spine.append(etree.fromstring(
645         '<itemref idref="support" />'))
646     html_string = open(get_resource('epub/support.html')).read()
647     chars.update(used_chars(etree.fromstring(html_string)))
648     zip.writestr('OPS/support.html', html_string)
649
650     toc.add("Strona redakcyjna", "last.html")
651     manifest.append(etree.fromstring(
652         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
653     spine.append(etree.fromstring(
654         '<itemref idref="last" />'))
655     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
656     chars.update(used_chars(html_tree.getroot()))
657     zip.writestr('OPS/last.html', etree.tostring(
658         html_tree, pretty_print=True, xml_declaration=True,
659         encoding="utf-8",
660         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
661                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
662     ))
663
664     if not flags or 'without-fonts' not in flags:
665         # strip fonts
666         tmpdir = mkdtemp('-librarian-epub')
667         try:
668             cwd = os.getcwd()
669         except OSError:
670             cwd = None
671
672         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
673         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
674             optimizer_call = ['perl', 'subset.pl', '--chars',
675                               ''.join(chars).encode('utf-8'),
676                               get_resource('fonts/' + fname),
677                               os.path.join(tmpdir, fname)]
678             if verbose:
679                 print "Running font-optimizer"
680                 subprocess.check_call(optimizer_call)
681             else:
682                 dev_null = open(os.devnull, 'w')
683                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
684             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
685             manifest.append(etree.fromstring(
686                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
687         rmtree(tmpdir)
688         if cwd is not None:
689             os.chdir(cwd)
690     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
691                  xml_declaration=True, encoding="utf-8"))
692     title = document.book_info.title
693     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
694     for st in attributes:
695         meta = toc_file.makeelement(NCXNS('meta'))
696         meta.set('name', st)
697         meta.set('content', '0')
698         toc_file[0].append(meta)
699     toc_file[0][0].set('content', str(document.book_info.url))
700     toc_file[0][1].set('content', str(toc.depth()))
701     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
702
703     # write TOC
704     if html_toc:
705         toc.add(u"Spis treści", "toc.html", index=1)
706         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
707     toc.write_to_xml(nav_map)
708     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
709                  xml_declaration=True, encoding="utf-8"))
710     zip.close()
711
712     return OutputFile.from_filename(output_file.name)