images in PDF
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from lxml import etree
15 import zipfile
16 from tempfile import mkdtemp, NamedTemporaryFile
17 from shutil import rmtree
18
19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
20 from librarian.cover import DefaultEbookCover
21
22 from librarian import functions, get_resource
23
24 from librarian.hyphenator import Hyphenator
25
26 functions.reg_person_name()
27 functions.reg_lang_code_3to2()
28
29
30 def set_hyph_language(source_tree):
31     def get_short_lng_code(text):
32         result = ''
33         text = ''.join(text)
34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
35             for line in f:
36                 list = line.strip().split('|')
37                 if list[0] == text:
38                     result = list[2]
39         if result == '':
40             return text
41         else:
42             return result
43     bibl_lng = etree.XPath('//dc:language//text()',
44                            namespaces={'dc': str(DCNS)})(source_tree)
45     short_lng = get_short_lng_code(bibl_lng[0])
46     try:
47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
48                                        short_lng + '.dic'))
49     except:
50         pass
51
52
53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
54     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
55     for t in texts:
56         parent = t.getparent()
57         if hyph is not None:
58             newt = ''
59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
60             for w in wlist:
61                 newt += hyph.inserted(w, u'\u00AD')
62         else:
63             newt = t
64         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
65         if t.is_text:
66             parent.text = newt
67         elif t.is_tail:
68             parent.tail = newt
69
70
71 def inner_xml(node):
72     """ returns node's text and children as a string
73
74     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
75     x<b>y</b>z
76     """
77
78     nt = node.text if node.text is not None else ''
79     return ''.join([nt] + [etree.tostring(child) for child in node])
80
81
82 def set_inner_xml(node, text):
83     """ sets node's text and children from a string
84
85     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
86     >>> set_inner_xml(e, 'x<b>y</b>z')
87     >>> print etree.tostring(e)
88     <a>x<b>y</b>z</a>
89     """
90
91     p = etree.fromstring('<x>%s</x>' % text)
92     node.text = p.text
93     node[:] = p[:]
94
95
96 def node_name(node):
97     """ Find out a node's name
98
99     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
100     XYZ
101     """
102
103     tempnode = deepcopy(node)
104
105     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
106         for e in tempnode.findall('.//%s' % p):
107             t = e.tail
108             e.clear()
109             e.tail = t
110     etree.strip_tags(tempnode, '*')
111     return tempnode.text
112
113
114 def xslt(xml, sheet):
115     if isinstance(xml, etree._Element):
116         xml = etree.ElementTree(xml)
117     with open(sheet) as xsltf:
118         return xml.xslt(etree.parse(xsltf))
119
120
121 def replace_characters(node):
122     def replace_chars(text):
123         if text is None:
124             return None
125         return text.replace(u"\ufeff", u"")\
126                    .replace("---", u"\u2014")\
127                    .replace("--", u"\u2013")\
128                    .replace(",,", u"\u201E")\
129                    .replace('"', u"\u201D")\
130                    .replace("'", u"\u2019")
131     if node.tag in ('uwaga', 'extra'):
132         t = node.tail
133         node.clear()
134         node.tail = t
135     node.text = replace_chars(node.text)
136     node.tail = replace_chars(node.tail)
137     for child in node:
138         replace_characters(child)
139
140
141 def find_annotations(annotations, source, part_no):
142     for child in source:
143         if child.tag in ('pe', 'pa', 'pt', 'pr'):
144             annotation = deepcopy(child)
145             number = str(len(annotations) + 1)
146             annotation.set('number', number)
147             annotation.set('part', str(part_no))
148             annotation.tail = ''
149             annotations.append(annotation)
150             tail = child.tail
151             child.clear()
152             child.tail = tail
153             child.text = number
154         if child.tag not in ('extra', 'uwaga'):
155             find_annotations(annotations, child, part_no)
156
157
158 class Stanza(object):
159     """
160     Converts / verse endings into verse elements in a stanza.
161
162     Slashes may only occur directly in the stanza. Any slashes in subelements
163     will be ignored, and the subelements will be put inside verse elements.
164
165     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
166     >>> Stanza(s).versify()
167     >>> print etree.tostring(s)
168     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
169     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
170
171     """
172     def __init__(self, stanza_elem):
173         self.stanza = stanza_elem
174         self.verses = []
175         self.open_verse = None
176
177     def versify(self):
178         self.push_text(self.stanza.text)
179         for elem in self.stanza:
180             self.push_elem(elem)
181             self.push_text(elem.tail)
182         tail = self.stanza.tail
183         self.stanza.clear()
184         self.stanza.tail = tail
185         self.stanza.extend(self.verses)
186
187     def open_normal_verse(self):
188         self.open_verse = self.stanza.makeelement("wers_normalny")
189         self.verses.append(self.open_verse)
190
191     def get_open_verse(self):
192         if self.open_verse is None:
193             self.open_normal_verse()
194         return self.open_verse
195
196     def push_text(self, text):
197         if not text:
198             return
199         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
200             if i:
201                 self.open_normal_verse()
202             verse = self.get_open_verse()
203             if len(verse):
204                 verse[-1].tail = (verse[-1].tail or "") + verse_text
205             else:
206                 verse.text = (verse.text or "") + verse_text
207
208     def push_elem(self, elem):
209         if elem.tag.startswith("wers"):
210             verse = deepcopy(elem)
211             verse.tail = None
212             self.verses.append(verse)
213             self.open_verse = verse
214         else:
215             appended = deepcopy(elem)
216             appended.tail = None
217             self.get_open_verse().append(appended)
218
219
220 def replace_by_verse(tree):
221     """ Find stanzas and create new verses in place of a '/' character """
222
223     stanzas = tree.findall('.//' + WLNS('strofa'))
224     for stanza in stanzas:
225         Stanza(stanza).versify()
226
227
228 def add_to_manifest(manifest, partno):
229     """ Adds a node to the manifest section in content.opf file """
230
231     partstr = 'part%d' % partno
232     e = manifest.makeelement(
233         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
234                                'media-type': 'application/xhtml+xml'}
235     )
236     manifest.append(e)
237
238
239 def add_to_spine(spine, partno):
240     """ Adds a node to the spine section in content.opf file """
241
242     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
243     spine.append(e)
244
245
246 class TOC(object):
247     def __init__(self, name=None, part_href=None):
248         self.children = []
249         self.name = name
250         self.part_href = part_href
251         self.sub_number = None
252
253     def add(self, name, part_href, level=0, is_part=True, index=None):
254         assert level == 0 or index is None
255         if level > 0 and self.children:
256             return self.children[-1].add(name, part_href, level - 1, is_part)
257         else:
258             t = TOC(name)
259             t.part_href = part_href
260             if index is not None:
261                 self.children.insert(index, t)
262             else:
263                 self.children.append(t)
264             if not is_part:
265                 t.sub_number = len(self.children) + 1
266                 return t.sub_number
267
268     def append(self, toc):
269         self.children.append(toc)
270
271     def extend(self, toc):
272         self.children.extend(toc.children)
273
274     def depth(self):
275         if self.children:
276             return max((c.depth() for c in self.children)) + 1
277         else:
278             return 0
279
280     def href(self):
281         src = self.part_href
282         if self.sub_number is not None:
283             src += '#sub%d' % self.sub_number
284         return src
285
286     def write_to_xml(self, nav_map, counter=1):
287         for child in self.children:
288             nav_point = nav_map.makeelement(NCXNS('navPoint'))
289             nav_point.set('id', 'NavPoint-%d' % counter)
290             nav_point.set('playOrder', str(counter))
291
292             nav_label = nav_map.makeelement(NCXNS('navLabel'))
293             text = nav_map.makeelement(NCXNS('text'))
294             if child.name is not None:
295                 text.text = re.sub(r'\n', ' ', child.name)
296             else:
297                 text.text = child.name
298             nav_label.append(text)
299             nav_point.append(nav_label)
300
301             content = nav_map.makeelement(NCXNS('content'))
302             content.set('src', child.href())
303             nav_point.append(content)
304             nav_map.append(nav_point)
305             counter = child.write_to_xml(nav_point, counter + 1)
306         return counter
307
308     def html_part(self, depth=0):
309         texts = []
310         for child in self.children:
311             texts.append(
312                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
313                 (depth, child.href(), child.name))
314             texts.append(child.html_part(depth + 1))
315         return "\n".join(texts)
316
317     def html(self):
318         with open(get_resource('epub/toc.html')) as f:
319             t = unicode(f.read(), 'utf-8')
320         return t % self.html_part()
321
322
323 def used_chars(element):
324     """ Lists characters used in an ETree Element """
325     chars = set((element.text or '') + (element.tail or ''))
326     for child in element:
327         chars = chars.union(used_chars(child))
328     return chars
329
330
331 def chop(main_text):
332     """ divide main content of the XML file into chunks """
333
334     # prepare a container for each chunk
335     part_xml = etree.Element('utwor')
336     etree.SubElement(part_xml, 'master')
337     main_xml_part = part_xml[0]  # master
338
339     last_node_part = False
340
341     # the below loop are workaround for a problem with epubs in drama ebooks without acts
342     is_scene = False
343     is_act = False
344     for one_part in main_text:
345         name = one_part.tag
346         if name == 'naglowek_scena':
347             is_scene = True
348         elif name == 'naglowek_akt':
349             is_act = True
350
351     for one_part in main_text:
352         name = one_part.tag
353         if is_act is False and is_scene is True:
354             if name == 'naglowek_czesc':
355                 yield part_xml
356                 last_node_part = True
357                 main_xml_part[:] = [deepcopy(one_part)]
358             elif not last_node_part and name == "naglowek_scena":
359                 yield part_xml
360                 main_xml_part[:] = [deepcopy(one_part)]
361             else:
362                 main_xml_part.append(deepcopy(one_part))
363                 last_node_part = False
364         else:
365             if name == 'naglowek_czesc':
366                 yield part_xml
367                 last_node_part = True
368                 main_xml_part[:] = [deepcopy(one_part)]
369             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
370                 yield part_xml
371                 main_xml_part[:] = [deepcopy(one_part)]
372             else:
373                 main_xml_part.append(deepcopy(one_part))
374                 last_node_part = False
375     yield part_xml
376
377
378 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
379     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
380
381     toc = TOC()
382     for element in chunk_xml[0]:
383         if element.tag == "naglowek_czesc":
384             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
385         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
386             toc.add(node_name(element), "part%d.html" % chunk_no)
387         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
388             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
389             element.set('sub', str(subnumber))
390     if empty:
391         if not _empty_html_static:
392             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
393         chars = set()
394         output_html = _empty_html_static[0]
395     else:
396         find_annotations(annotations, chunk_xml, chunk_no)
397         replace_by_verse(chunk_xml)
398         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
399         chars = used_chars(html_tree.getroot())
400         output_html = etree.tostring(
401             html_tree, pretty_print=True, xml_declaration=True,
402             encoding="utf-8",
403             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
404                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
405         )
406     return output_html, toc, chars
407
408
409 def transform(wldoc, verbose=False,
410               style=None, html_toc=False,
411               sample=None, cover=None, flags=None, hyphenate=False):
412     """ produces a EPUB file
413
414     sample=n: generate sample e-book (with at least n paragraphs)
415     cover: a cover.Cover factory or True for default
416     flags: less-advertising, without-fonts, working-copy
417     """
418
419     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
420         """ processes one input file and proceeds to its children """
421
422         replace_characters(wldoc.edoc.getroot())
423
424         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
425         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
426
427         # every input file will have a TOC entry,
428         # pointing to starting chunk
429         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
430         chars = set()
431         if first:
432             # write book title page
433             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
434             chars = used_chars(html_tree.getroot())
435             zip.writestr(
436                 'OPS/title.html',
437                 etree.tostring(
438                     html_tree, pretty_print=True, xml_declaration=True,
439                     encoding="utf-8",
440                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
441                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
442                 )
443             )
444             # add a title page TOC entry
445             toc.add(u"Strona tytułowa", "title.html")
446         elif wldoc.book_info.parts:
447             # write title page for every parent
448             if sample is not None and sample <= 0:
449                 chars = set()
450                 html_string = open(get_resource('epub/emptyChunk.html')).read()
451             else:
452                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
453                 chars = used_chars(html_tree.getroot())
454                 html_string = etree.tostring(
455                     html_tree, pretty_print=True, xml_declaration=True,
456                     encoding="utf-8",
457                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
458                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
459                 )
460             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
461             add_to_manifest(manifest, chunk_counter)
462             add_to_spine(spine, chunk_counter)
463             chunk_counter += 1
464
465         if len(wldoc.edoc.getroot()) > 1:
466             # rdf before style master
467             main_text = wldoc.edoc.getroot()[1]
468         else:
469             # rdf in style master
470             main_text = wldoc.edoc.getroot()[0]
471             if main_text.tag == RDFNS('RDF'):
472                 main_text = None
473
474         if main_text is not None:
475             for chunk_xml in chop(main_text):
476                 empty = False
477                 if sample is not None:
478                     if sample <= 0:
479                         empty = True
480                     else:
481                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
482                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
483
484                 toc.extend(chunk_toc)
485                 chars = chars.union(chunk_chars)
486                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
487                 add_to_manifest(manifest, chunk_counter)
488                 add_to_spine(spine, chunk_counter)
489                 chunk_counter += 1
490
491         for child in wldoc.parts():
492             child_toc, chunk_counter, chunk_chars, sample = transform_file(
493                 child, chunk_counter, first=False, sample=sample)
494             toc.append(child_toc)
495             chars = chars.union(chunk_chars)
496
497         return toc, chunk_counter, chars, sample
498
499     document = deepcopy(wldoc)
500     del wldoc
501
502     if flags:
503         for flag in flags:
504             document.edoc.getroot().set(flag, 'yes')
505
506     document.clean_ed_note()
507     document.clean_ed_note('abstrakt')
508
509     # add editors info
510     editors = document.editors()
511     if editors:
512         document.edoc.getroot().set('editors', u', '.join(sorted(
513             editor.readable() for editor in editors)))
514     if document.book_info.funders:
515         document.edoc.getroot().set('funders', u', '.join(
516             document.book_info.funders))
517     if document.book_info.thanks:
518         document.edoc.getroot().set('thanks', document.book_info.thanks)
519
520     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
521     manifest = opf.find('.//' + OPFNS('manifest'))
522     guide = opf.find('.//' + OPFNS('guide'))
523     spine = opf.find('.//' + OPFNS('spine'))
524
525     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
526     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
527
528     functions.reg_mathml_epub(zip)
529
530     # write static elements
531     mime = zipfile.ZipInfo()
532     mime.filename = 'mimetype'
533     mime.compress_type = zipfile.ZIP_STORED
534     mime.extra = ''
535     zip.writestr(mime, 'application/epub+zip')
536     zip.writestr(
537         'META-INF/container.xml',
538         '<?xml version="1.0" ?>'
539         '<container version="1.0" '
540         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
541         '<rootfiles><rootfile full-path="OPS/content.opf" '
542         'media-type="application/oebps-package+xml" />'
543         '</rootfiles></container>'
544     )
545     zip.write(get_resource('res/wl-logo-small.png'),
546               os.path.join('OPS', 'logo_wolnelektury.png'))
547     zip.write(get_resource('res/jedenprocent.png'),
548               os.path.join('OPS', 'jedenprocent.png'))
549     if not style:
550         style = get_resource('epub/style.css')
551     zip.write(style, os.path.join('OPS', 'style.css'))
552
553     if cover:
554         if cover is True:
555             cover = DefaultEbookCover
556
557         cover_file = StringIO()
558         bound_cover = cover(document.book_info)
559         bound_cover.save(cover_file)
560         cover_name = 'cover.%s' % bound_cover.ext()
561         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
562         del cover_file
563
564         cover_tree = etree.parse(get_resource('epub/cover.html'))
565         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
566         zip.writestr('OPS/cover.html', etree.tostring(
567             cover_tree, pretty_print=True, xml_declaration=True,
568             encoding="utf-8",
569             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
570                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
571         ))
572
573         if bound_cover.uses_dc_cover:
574             if document.book_info.cover_by:
575                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
576             if document.book_info.cover_source:
577                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
578
579         manifest.append(etree.fromstring(
580             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
581         manifest.append(etree.fromstring(
582             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
583         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
584         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
585         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
586
587     annotations = etree.Element('annotations')
588
589     toc_file = etree.fromstring(
590         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
591         '"-//NISO//DTD ncx 2005-1//EN" '
592         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
593         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
594         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
595         '</navMap></ncx>'
596     )
597     nav_map = toc_file[-1]
598
599     if html_toc:
600         manifest.append(etree.fromstring(
601             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
602         spine.append(etree.fromstring(
603             '<itemref idref="html_toc" />'))
604         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
605
606     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
607
608     if len(toc.children) < 2:
609         toc.add(u"Początek utworu", "part1.html")
610
611     # Last modifications in container files and EPUB creation
612     if len(annotations) > 0:
613         toc.add("Przypisy", "annotations.html")
614         manifest.append(etree.fromstring(
615             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
616         spine.append(etree.fromstring(
617             '<itemref idref="annotations" />'))
618         replace_by_verse(annotations)
619         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
620         chars = chars.union(used_chars(html_tree.getroot()))
621         zip.writestr('OPS/annotations.html', etree.tostring(
622             html_tree, pretty_print=True, xml_declaration=True,
623             encoding="utf-8",
624             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
625                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
626         ))
627
628     toc.add("Wesprzyj Wolne Lektury", "support.html")
629     manifest.append(etree.fromstring(
630         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
631     spine.append(etree.fromstring(
632         '<itemref idref="support" />'))
633     html_string = open(get_resource('epub/support.html')).read()
634     chars.update(used_chars(etree.fromstring(html_string)))
635     zip.writestr('OPS/support.html', html_string)
636
637     toc.add("Strona redakcyjna", "last.html")
638     manifest.append(etree.fromstring(
639         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
640     spine.append(etree.fromstring(
641         '<itemref idref="last" />'))
642     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
643     chars.update(used_chars(html_tree.getroot()))
644     zip.writestr('OPS/last.html', etree.tostring(
645         html_tree, pretty_print=True, xml_declaration=True,
646         encoding="utf-8",
647         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
648                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
649     ))
650
651     if not flags or 'without-fonts' not in flags:
652         # strip fonts
653         tmpdir = mkdtemp('-librarian-epub')
654         try:
655             cwd = os.getcwd()
656         except OSError:
657             cwd = None
658
659         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
660         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
661             optimizer_call = ['perl', 'subset.pl', '--chars',
662                               ''.join(chars).encode('utf-8'),
663                               get_resource('fonts/' + fname),
664                               os.path.join(tmpdir, fname)]
665             if verbose:
666                 print "Running font-optimizer"
667                 subprocess.check_call(optimizer_call)
668             else:
669                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
670             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
671             manifest.append(etree.fromstring(
672                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
673         rmtree(tmpdir)
674         if cwd is not None:
675             os.chdir(cwd)
676     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
677                  xml_declaration=True, encoding="utf-8"))
678     title = document.book_info.title
679     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
680     for st in attributes:
681         meta = toc_file.makeelement(NCXNS('meta'))
682         meta.set('name', st)
683         meta.set('content', '0')
684         toc_file[0].append(meta)
685     toc_file[0][0].set('content', str(document.book_info.url))
686     toc_file[0][1].set('content', str(toc.depth()))
687     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
688
689     # write TOC
690     if html_toc:
691         toc.add(u"Spis treści", "toc.html", index=1)
692         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
693     toc.write_to_xml(nav_map)
694     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
695                  xml_declaration=True, encoding="utf-8"))
696     zip.close()
697
698     return OutputFile.from_filename(output_file.name)