turn off hyphenator
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from lxml import etree
15 import zipfile
16 from tempfile import mkdtemp, NamedTemporaryFile
17 from shutil import rmtree
18
19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
20 from librarian.cover import DefaultEbookCover
21
22 from librarian import functions, get_resource
23
24 from librarian.hyphenator import Hyphenator
25
26 functions.reg_person_name()
27 functions.reg_lang_code_3to2()
28
29
30 def set_hyph_language(source_tree):
31     def get_short_lng_code(text):
32         result = ''
33         text = ''.join(text)
34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
35             for line in f:
36                 list = line.strip().split('|')
37                 if list[0] == text:
38                     result = list[2]
39         if result == '':
40             return text
41         else:
42             return result
43     bibl_lng = etree.XPath('//dc:language//text()',
44                            namespaces={'dc': str(DCNS)})(source_tree)
45     short_lng = get_short_lng_code(bibl_lng[0])
46     try:
47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
48                                        short_lng + '.dic'))
49     except:
50         pass
51
52
53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
54     if hyph is not None:
55         texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
56         for t in texts:
57             parent = t.getparent()
58             newt = ''
59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
60             for w in wlist:
61                 newt += hyph.inserted(w, u'\u00AD')
62             newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
63             if t.is_text:
64                 parent.text = newt
65             elif t.is_tail:
66                 parent.tail = newt
67
68
69 def inner_xml(node):
70     """ returns node's text and children as a string
71
72     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
73     x<b>y</b>z
74     """
75
76     nt = node.text if node.text is not None else ''
77     return ''.join([nt] + [etree.tostring(child) for child in node])
78
79
80 def set_inner_xml(node, text):
81     """ sets node's text and children from a string
82
83     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
84     >>> set_inner_xml(e, 'x<b>y</b>z')
85     >>> print etree.tostring(e)
86     <a>x<b>y</b>z</a>
87     """
88
89     p = etree.fromstring('<x>%s</x>' % text)
90     node.text = p.text
91     node[:] = p[:]
92
93
94 def node_name(node):
95     """ Find out a node's name
96
97     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
98     XYZ
99     """
100
101     tempnode = deepcopy(node)
102
103     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
104         for e in tempnode.findall('.//%s' % p):
105             t = e.tail
106             e.clear()
107             e.tail = t
108     etree.strip_tags(tempnode, '*')
109     return tempnode.text
110
111
112 def xslt(xml, sheet):
113     if isinstance(xml, etree._Element):
114         xml = etree.ElementTree(xml)
115     with open(sheet) as xsltf:
116         return xml.xslt(etree.parse(xsltf))
117
118
119 def replace_characters(node):
120     def replace_chars(text):
121         if text is None:
122             return None
123         return text.replace(u"\ufeff", u"")\
124                    .replace("---", u"\u2014")\
125                    .replace("--", u"\u2013")\
126                    .replace(",,", u"\u201E")\
127                    .replace('"', u"\u201D")\
128                    .replace("'", u"\u2019")
129     if node.tag in ('uwaga', 'extra'):
130         t = node.tail
131         node.clear()
132         node.tail = t
133     node.text = replace_chars(node.text)
134     node.tail = replace_chars(node.tail)
135     for child in node:
136         replace_characters(child)
137
138
139 def find_annotations(annotations, source, part_no):
140     for child in source:
141         if child.tag in ('pe', 'pa', 'pt', 'pr'):
142             annotation = deepcopy(child)
143             number = str(len(annotations) + 1)
144             annotation.set('number', number)
145             annotation.set('part', str(part_no))
146             annotation.tail = ''
147             annotations.append(annotation)
148             tail = child.tail
149             child.clear()
150             child.tail = tail
151             child.text = number
152         if child.tag not in ('extra', 'uwaga'):
153             find_annotations(annotations, child, part_no)
154
155
156 class Stanza(object):
157     """
158     Converts / verse endings into verse elements in a stanza.
159
160     Slashes may only occur directly in the stanza. Any slashes in subelements
161     will be ignored, and the subelements will be put inside verse elements.
162
163     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
164     >>> Stanza(s).versify()
165     >>> print etree.tostring(s)
166     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
167     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
168
169     """
170     def __init__(self, stanza_elem):
171         self.stanza = stanza_elem
172         self.verses = []
173         self.open_verse = None
174
175     def versify(self):
176         self.push_text(self.stanza.text)
177         for elem in self.stanza:
178             self.push_elem(elem)
179             self.push_text(elem.tail)
180         tail = self.stanza.tail
181         self.stanza.clear()
182         self.stanza.tail = tail
183         self.stanza.extend(self.verses)
184
185     def open_normal_verse(self):
186         self.open_verse = self.stanza.makeelement("wers_normalny")
187         self.verses.append(self.open_verse)
188
189     def get_open_verse(self):
190         if self.open_verse is None:
191             self.open_normal_verse()
192         return self.open_verse
193
194     def push_text(self, text):
195         if not text:
196             return
197         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
198             if i:
199                 self.open_normal_verse()
200             verse = self.get_open_verse()
201             if len(verse):
202                 verse[-1].tail = (verse[-1].tail or "") + verse_text
203             else:
204                 verse.text = (verse.text or "") + verse_text
205
206     def push_elem(self, elem):
207         if elem.tag.startswith("wers"):
208             verse = deepcopy(elem)
209             verse.tail = None
210             self.verses.append(verse)
211             self.open_verse = verse
212         else:
213             appended = deepcopy(elem)
214             appended.tail = None
215             self.get_open_verse().append(appended)
216
217
218 def replace_by_verse(tree):
219     """ Find stanzas and create new verses in place of a '/' character """
220
221     stanzas = tree.findall('.//' + WLNS('strofa'))
222     for stanza in stanzas:
223         Stanza(stanza).versify()
224
225
226 def add_to_manifest(manifest, partno):
227     """ Adds a node to the manifest section in content.opf file """
228
229     partstr = 'part%d' % partno
230     e = manifest.makeelement(
231         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
232                                'media-type': 'application/xhtml+xml'}
233     )
234     manifest.append(e)
235
236
237 def add_to_spine(spine, partno):
238     """ Adds a node to the spine section in content.opf file """
239
240     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
241     spine.append(e)
242
243
244 class TOC(object):
245     def __init__(self, name=None, part_href=None):
246         self.children = []
247         self.name = name
248         self.part_href = part_href
249         self.sub_number = None
250
251     def add(self, name, part_href, level=0, is_part=True, index=None):
252         assert level == 0 or index is None
253         if level > 0 and self.children:
254             return self.children[-1].add(name, part_href, level - 1, is_part)
255         else:
256             t = TOC(name)
257             t.part_href = part_href
258             if index is not None:
259                 self.children.insert(index, t)
260             else:
261                 self.children.append(t)
262             if not is_part:
263                 t.sub_number = len(self.children) + 1
264                 return t.sub_number
265
266     def append(self, toc):
267         self.children.append(toc)
268
269     def extend(self, toc):
270         self.children.extend(toc.children)
271
272     def depth(self):
273         if self.children:
274             return max((c.depth() for c in self.children)) + 1
275         else:
276             return 0
277
278     def href(self):
279         src = self.part_href
280         if self.sub_number is not None:
281             src += '#sub%d' % self.sub_number
282         return src
283
284     def write_to_xml(self, nav_map, counter=1):
285         for child in self.children:
286             nav_point = nav_map.makeelement(NCXNS('navPoint'))
287             nav_point.set('id', 'NavPoint-%d' % counter)
288             nav_point.set('playOrder', str(counter))
289
290             nav_label = nav_map.makeelement(NCXNS('navLabel'))
291             text = nav_map.makeelement(NCXNS('text'))
292             if child.name is not None:
293                 text.text = re.sub(r'\n', ' ', child.name)
294             else:
295                 text.text = child.name
296             nav_label.append(text)
297             nav_point.append(nav_label)
298
299             content = nav_map.makeelement(NCXNS('content'))
300             content.set('src', child.href())
301             nav_point.append(content)
302             nav_map.append(nav_point)
303             counter = child.write_to_xml(nav_point, counter + 1)
304         return counter
305
306     def html_part(self, depth=0):
307         texts = []
308         for child in self.children:
309             texts.append(
310                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
311                 (depth, child.href(), child.name))
312             texts.append(child.html_part(depth + 1))
313         return "\n".join(texts)
314
315     def html(self):
316         with open(get_resource('epub/toc.html')) as f:
317             t = unicode(f.read(), 'utf-8')
318         return t % self.html_part()
319
320
321 def used_chars(element):
322     """ Lists characters used in an ETree Element """
323     chars = set((element.text or '') + (element.tail or ''))
324     for child in element:
325         chars = chars.union(used_chars(child))
326     return chars
327
328
329 def chop(main_text):
330     """ divide main content of the XML file into chunks """
331
332     # prepare a container for each chunk
333     part_xml = etree.Element('utwor')
334     etree.SubElement(part_xml, 'master')
335     main_xml_part = part_xml[0]  # master
336
337     last_node_part = False
338
339     # the below loop are workaround for a problem with epubs in drama ebooks without acts
340     is_scene = False
341     is_act = False
342     for one_part in main_text:
343         name = one_part.tag
344         if name == 'naglowek_scena':
345             is_scene = True
346         elif name == 'naglowek_akt':
347             is_act = True
348
349     for one_part in main_text:
350         name = one_part.tag
351         if is_act is False and is_scene is True:
352             if name == 'naglowek_czesc':
353                 yield part_xml
354                 last_node_part = True
355                 main_xml_part[:] = [deepcopy(one_part)]
356             elif not last_node_part and name == "naglowek_scena":
357                 yield part_xml
358                 main_xml_part[:] = [deepcopy(one_part)]
359             else:
360                 main_xml_part.append(deepcopy(one_part))
361                 last_node_part = False
362         else:
363             if name == 'naglowek_czesc':
364                 yield part_xml
365                 last_node_part = True
366                 main_xml_part[:] = [deepcopy(one_part)]
367             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
368                 yield part_xml
369                 main_xml_part[:] = [deepcopy(one_part)]
370             else:
371                 main_xml_part.append(deepcopy(one_part))
372                 last_node_part = False
373     yield part_xml
374
375
376 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
377     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
378
379     toc = TOC()
380     for element in chunk_xml[0]:
381         if element.tag == "naglowek_czesc":
382             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
383         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
384             toc.add(node_name(element), "part%d.html" % chunk_no)
385         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
386             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
387             element.set('sub', str(subnumber))
388     if empty:
389         if not _empty_html_static:
390             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
391         chars = set()
392         output_html = _empty_html_static[0]
393     else:
394         find_annotations(annotations, chunk_xml, chunk_no)
395         replace_by_verse(chunk_xml)
396         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
397         chars = used_chars(html_tree.getroot())
398         output_html = etree.tostring(
399             html_tree, pretty_print=True, xml_declaration=True,
400             encoding="utf-8",
401             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
402                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
403         )
404     return output_html, toc, chars
405
406
407 def transform(wldoc, verbose=False,
408               style=None, html_toc=False,
409               sample=None, cover=None, flags=None, hyphenate=False):
410     """ produces a EPUB file
411
412     sample=n: generate sample e-book (with at least n paragraphs)
413     cover: a cover.Cover factory or True for default
414     flags: less-advertising, without-fonts, working-copy
415     """
416
417     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
418         """ processes one input file and proceeds to its children """
419
420         replace_characters(wldoc.edoc.getroot())
421
422         if hyphenate:
423             hyphenator = set_hyph_language(wldoc.edoc.getroot())
424             hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
425
426         # every input file will have a TOC entry,
427         # pointing to starting chunk
428         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
429         chars = set()
430         if first:
431             # write book title page
432             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
433             chars = used_chars(html_tree.getroot())
434             zip.writestr(
435                 'OPS/title.html',
436                 etree.tostring(
437                     html_tree, pretty_print=True, xml_declaration=True,
438                     encoding="utf-8",
439                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
440                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
441                 )
442             )
443             # add a title page TOC entry
444             toc.add(u"Strona tytułowa", "title.html")
445         elif wldoc.book_info.parts:
446             # write title page for every parent
447             if sample is not None and sample <= 0:
448                 chars = set()
449                 html_string = open(get_resource('epub/emptyChunk.html')).read()
450             else:
451                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
452                 chars = used_chars(html_tree.getroot())
453                 html_string = etree.tostring(
454                     html_tree, pretty_print=True, xml_declaration=True,
455                     encoding="utf-8",
456                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
457                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
458                 )
459             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
460             add_to_manifest(manifest, chunk_counter)
461             add_to_spine(spine, chunk_counter)
462             chunk_counter += 1
463
464         if len(wldoc.edoc.getroot()) > 1:
465             # rdf before style master
466             main_text = wldoc.edoc.getroot()[1]
467         else:
468             # rdf in style master
469             main_text = wldoc.edoc.getroot()[0]
470             if main_text.tag == RDFNS('RDF'):
471                 main_text = None
472
473         if main_text is not None:
474             for chunk_xml in chop(main_text):
475                 empty = False
476                 if sample is not None:
477                     if sample <= 0:
478                         empty = True
479                     else:
480                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
481                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
482
483                 toc.extend(chunk_toc)
484                 chars = chars.union(chunk_chars)
485                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
486                 add_to_manifest(manifest, chunk_counter)
487                 add_to_spine(spine, chunk_counter)
488                 chunk_counter += 1
489
490         for child in wldoc.parts():
491             child_toc, chunk_counter, chunk_chars, sample = transform_file(
492                 child, chunk_counter, first=False, sample=sample)
493             toc.append(child_toc)
494             chars = chars.union(chunk_chars)
495
496         return toc, chunk_counter, chars, sample
497
498     document = deepcopy(wldoc)
499     del wldoc
500
501     if flags:
502         for flag in flags:
503             document.edoc.getroot().set(flag, 'yes')
504
505     document.clean_ed_note()
506     document.clean_ed_note('abstrakt')
507
508     # add editors info
509     editors = document.editors()
510     if editors:
511         document.edoc.getroot().set('editors', u', '.join(sorted(
512             editor.readable() for editor in editors)))
513     if document.book_info.funders:
514         document.edoc.getroot().set('funders', u', '.join(
515             document.book_info.funders))
516     if document.book_info.thanks:
517         document.edoc.getroot().set('thanks', document.book_info.thanks)
518
519     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
520     manifest = opf.find('.//' + OPFNS('manifest'))
521     guide = opf.find('.//' + OPFNS('guide'))
522     spine = opf.find('.//' + OPFNS('spine'))
523
524     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
525     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
526
527     functions.reg_mathml_epub(zip)
528
529     # write static elements
530     mime = zipfile.ZipInfo()
531     mime.filename = 'mimetype'
532     mime.compress_type = zipfile.ZIP_STORED
533     mime.extra = ''
534     zip.writestr(mime, 'application/epub+zip')
535     zip.writestr(
536         'META-INF/container.xml',
537         '<?xml version="1.0" ?>'
538         '<container version="1.0" '
539         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
540         '<rootfiles><rootfile full-path="OPS/content.opf" '
541         'media-type="application/oebps-package+xml" />'
542         '</rootfiles></container>'
543     )
544     zip.write(get_resource('res/wl-logo-small.png'),
545               os.path.join('OPS', 'logo_wolnelektury.png'))
546     zip.write(get_resource('res/jedenprocent.png'),
547               os.path.join('OPS', 'jedenprocent.png'))
548     if not style:
549         style = get_resource('epub/style.css')
550     zip.write(style, os.path.join('OPS', 'style.css'))
551
552     if cover:
553         if cover is True:
554             cover = DefaultEbookCover
555
556         cover_file = StringIO()
557         bound_cover = cover(document.book_info)
558         bound_cover.save(cover_file)
559         cover_name = 'cover.%s' % bound_cover.ext()
560         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
561         del cover_file
562
563         cover_tree = etree.parse(get_resource('epub/cover.html'))
564         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
565         zip.writestr('OPS/cover.html', etree.tostring(
566             cover_tree, pretty_print=True, xml_declaration=True,
567             encoding="utf-8",
568             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
569                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
570         ))
571
572         if bound_cover.uses_dc_cover:
573             if document.book_info.cover_by:
574                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
575             if document.book_info.cover_source:
576                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
577
578         manifest.append(etree.fromstring(
579             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
580         manifest.append(etree.fromstring(
581             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
582         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
583         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
584         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
585
586     annotations = etree.Element('annotations')
587
588     toc_file = etree.fromstring(
589         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
590         '"-//NISO//DTD ncx 2005-1//EN" '
591         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
592         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
593         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
594         '</navMap></ncx>'
595     )
596     nav_map = toc_file[-1]
597
598     if html_toc:
599         manifest.append(etree.fromstring(
600             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
601         spine.append(etree.fromstring(
602             '<itemref idref="html_toc" />'))
603         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
604
605     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
606
607     if len(toc.children) < 2:
608         toc.add(u"Początek utworu", "part1.html")
609
610     # Last modifications in container files and EPUB creation
611     if len(annotations) > 0:
612         toc.add("Przypisy", "annotations.html")
613         manifest.append(etree.fromstring(
614             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
615         spine.append(etree.fromstring(
616             '<itemref idref="annotations" />'))
617         replace_by_verse(annotations)
618         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
619         chars = chars.union(used_chars(html_tree.getroot()))
620         zip.writestr('OPS/annotations.html', etree.tostring(
621             html_tree, pretty_print=True, xml_declaration=True,
622             encoding="utf-8",
623             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
624                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
625         ))
626
627     toc.add("Wesprzyj Wolne Lektury", "support.html")
628     manifest.append(etree.fromstring(
629         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
630     spine.append(etree.fromstring(
631         '<itemref idref="support" />'))
632     html_string = open(get_resource('epub/support.html')).read()
633     chars.update(used_chars(etree.fromstring(html_string)))
634     zip.writestr('OPS/support.html', html_string)
635
636     toc.add("Strona redakcyjna", "last.html")
637     manifest.append(etree.fromstring(
638         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
639     spine.append(etree.fromstring(
640         '<itemref idref="last" />'))
641     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
642     chars.update(used_chars(html_tree.getroot()))
643     zip.writestr('OPS/last.html', etree.tostring(
644         html_tree, pretty_print=True, xml_declaration=True,
645         encoding="utf-8",
646         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
647                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
648     ))
649
650     if not flags or 'without-fonts' not in flags:
651         # strip fonts
652         tmpdir = mkdtemp('-librarian-epub')
653         try:
654             cwd = os.getcwd()
655         except OSError:
656             cwd = None
657
658         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
659         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
660             optimizer_call = ['perl', 'subset.pl', '--chars',
661                               ''.join(chars).encode('utf-8'),
662                               get_resource('fonts/' + fname),
663                               os.path.join(tmpdir, fname)]
664             if verbose:
665                 print "Running font-optimizer"
666                 subprocess.check_call(optimizer_call)
667             else:
668                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
669             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
670             manifest.append(etree.fromstring(
671                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
672         rmtree(tmpdir)
673         if cwd is not None:
674             os.chdir(cwd)
675     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
676                  xml_declaration=True, encoding="utf-8"))
677     title = document.book_info.title
678     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
679     for st in attributes:
680         meta = toc_file.makeelement(NCXNS('meta'))
681         meta.set('name', st)
682         meta.set('content', '0')
683         toc_file[0].append(meta)
684     toc_file[0][0].set('content', str(document.book_info.url))
685     toc_file[0][1].set('content', str(toc.depth()))
686     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
687
688     # write TOC
689     if html_toc:
690         toc.add(u"Spis treści", "toc.html", index=1)
691         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
692     toc.write_to_xml(nav_map)
693     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
694                  xml_declaration=True, encoding="utf-8"))
695     zip.close()
696
697     return OutputFile.from_filename(output_file.name)