3b405f820d59ce7bf557b46d44e6bf0da4d7ad48
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from lxml import etree
15 import zipfile
16 from tempfile import mkdtemp, NamedTemporaryFile
17 from shutil import rmtree
18
19 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
20 from librarian.cover import DefaultEbookCover
21
22 from librarian import functions, get_resource
23
24 from librarian.hyphenator import Hyphenator
25
26 functions.reg_person_name()
27 functions.reg_lang_code_3to2()
28
29
30 def set_hyph_language(source_tree):
31     def get_short_lng_code(text):
32         result = ''
33         text = ''.join(text)
34         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
35             for line in f:
36                 list = line.strip().split('|')
37                 if list[0] == text:
38                     result = list[2]
39         if result == '':
40             return text
41         else:
42             return result
43     bibl_lng = etree.XPath('//dc:language//text()',
44                            namespaces={'dc': str(DCNS)})(source_tree)
45     short_lng = get_short_lng_code(bibl_lng[0])
46     try:
47         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
48                                        short_lng + '.dic'))
49     except:
50         pass
51
52
53 def hyphenate_and_fix_conjunctions(source_tree, hyph):
54     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
55     for t in texts:
56         parent = t.getparent()
57         if hyph is not None:
58             newt = ''
59             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
60             for w in wlist:
61                 newt += hyph.inserted(w, u'\u00AD')
62         else:
63             newt = t
64         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
65         if t.is_text:
66             parent.text = newt
67         elif t.is_tail:
68             parent.tail = newt
69
70
71 def inner_xml(node):
72     """ returns node's text and children as a string
73
74     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
75     x<b>y</b>z
76     """
77
78     nt = node.text if node.text is not None else ''
79     return ''.join([nt] + [etree.tostring(child) for child in node])
80
81
82 def set_inner_xml(node, text):
83     """ sets node's text and children from a string
84
85     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
86     >>> set_inner_xml(e, 'x<b>y</b>z')
87     >>> print etree.tostring(e)
88     <a>x<b>y</b>z</a>
89     """
90
91     p = etree.fromstring('<x>%s</x>' % text)
92     node.text = p.text
93     node[:] = p[:]
94
95
96 def node_name(node):
97     """ Find out a node's name
98
99     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
100     XYZ
101     """
102
103     tempnode = deepcopy(node)
104
105     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
106         for e in tempnode.findall('.//%s' % p):
107             t = e.tail
108             e.clear()
109             e.tail = t
110     etree.strip_tags(tempnode, '*')
111     return tempnode.text
112
113
114 def xslt(xml, sheet):
115     if isinstance(xml, etree._Element):
116         xml = etree.ElementTree(xml)
117     with open(sheet) as xsltf:
118         return xml.xslt(etree.parse(xsltf))
119
120
121 def replace_characters(node):
122     def replace_chars(text):
123         if text is None:
124             return None
125         return text.replace(u"\ufeff", u"")\
126                    .replace("---", u"\u2014")\
127                    .replace("--", u"\u2013")\
128                    .replace(",,", u"\u201E")\
129                    .replace('"', u"\u201D")\
130                    .replace("'", u"\u2019")
131     if node.tag in ('uwaga', 'extra'):
132         t = node.tail
133         node.clear()
134         node.tail = t
135     node.text = replace_chars(node.text)
136     node.tail = replace_chars(node.tail)
137     for child in node:
138         replace_characters(child)
139
140
141 def find_annotations(annotations, source, part_no):
142     for child in source:
143         if child.tag in ('pe', 'pa', 'pt', 'pr'):
144             annotation = deepcopy(child)
145             number = str(len(annotations) + 1)
146             annotation.set('number', number)
147             annotation.set('part', str(part_no))
148             annotation.tail = ''
149             annotations.append(annotation)
150             tail = child.tail
151             child.clear()
152             child.tail = tail
153             child.text = number
154         if child.tag not in ('extra', 'uwaga'):
155             find_annotations(annotations, child, part_no)
156
157
158 class Stanza(object):
159     """
160     Converts / verse endings into verse elements in a stanza.
161
162     Slashes may only occur directly in the stanza. Any slashes in subelements
163     will be ignored, and the subelements will be put inside verse elements.
164
165     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
166     >>> Stanza(s).versify()
167     >>> print etree.tostring(s)
168     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
169     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
170
171     """
172     def __init__(self, stanza_elem):
173         self.stanza = stanza_elem
174         self.verses = []
175         self.open_verse = None
176
177     def versify(self):
178         self.push_text(self.stanza.text)
179         for elem in self.stanza:
180             self.push_elem(elem)
181             self.push_text(elem.tail)
182         tail = self.stanza.tail
183         self.stanza.clear()
184         self.stanza.tail = tail
185         self.stanza.extend(self.verses)
186
187     def open_normal_verse(self):
188         self.open_verse = self.stanza.makeelement("wers_normalny")
189         self.verses.append(self.open_verse)
190
191     def get_open_verse(self):
192         if self.open_verse is None:
193             self.open_normal_verse()
194         return self.open_verse
195
196     def push_text(self, text):
197         if not text:
198             return
199         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
200             if i:
201                 self.open_normal_verse()
202             verse = self.get_open_verse()
203             if len(verse):
204                 verse[-1].tail = (verse[-1].tail or "") + verse_text
205             else:
206                 verse.text = (verse.text or "") + verse_text
207
208     def push_elem(self, elem):
209         if elem.tag.startswith("wers"):
210             verse = deepcopy(elem)
211             verse.tail = None
212             self.verses.append(verse)
213             self.open_verse = verse
214         else:
215             appended = deepcopy(elem)
216             appended.tail = None
217             self.get_open_verse().append(appended)
218
219
220 def replace_by_verse(tree):
221     """ Find stanzas and create new verses in place of a '/' character """
222
223     stanzas = tree.findall('.//' + WLNS('strofa'))
224     for stanza in stanzas:
225         Stanza(stanza).versify()
226
227
228 def add_to_manifest(manifest, partno):
229     """ Adds a node to the manifest section in content.opf file """
230
231     partstr = 'part%d' % partno
232     e = manifest.makeelement(
233         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
234                                'media-type': 'application/xhtml+xml'}
235     )
236     manifest.append(e)
237
238
239 def add_to_spine(spine, partno):
240     """ Adds a node to the spine section in content.opf file """
241
242     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
243     spine.append(e)
244
245
246 class TOC(object):
247     def __init__(self, name=None, part_href=None):
248         self.children = []
249         self.name = name
250         self.part_href = part_href
251         self.sub_number = None
252
253     def add(self, name, part_href, level=0, is_part=True, index=None):
254         assert level == 0 or index is None
255         if level > 0 and self.children:
256             return self.children[-1].add(name, part_href, level - 1, is_part)
257         else:
258             t = TOC(name)
259             t.part_href = part_href
260             if index is not None:
261                 self.children.insert(index, t)
262             else:
263                 self.children.append(t)
264             if not is_part:
265                 t.sub_number = len(self.children) + 1
266                 return t.sub_number
267
268     def append(self, toc):
269         self.children.append(toc)
270
271     def extend(self, toc):
272         self.children.extend(toc.children)
273
274     def depth(self):
275         if self.children:
276             return max((c.depth() for c in self.children)) + 1
277         else:
278             return 0
279
280     def href(self):
281         src = self.part_href
282         if self.sub_number is not None:
283             src += '#sub%d' % self.sub_number
284         return src
285
286     def write_to_xml(self, nav_map, counter=1):
287         for child in self.children:
288             nav_point = nav_map.makeelement(NCXNS('navPoint'))
289             nav_point.set('id', 'NavPoint-%d' % counter)
290             nav_point.set('playOrder', str(counter))
291
292             nav_label = nav_map.makeelement(NCXNS('navLabel'))
293             text = nav_map.makeelement(NCXNS('text'))
294             if child.name is not None:
295                 text.text = re.sub(r'\n', ' ', child.name)
296             else:
297                 text.text = child.name
298             nav_label.append(text)
299             nav_point.append(nav_label)
300
301             content = nav_map.makeelement(NCXNS('content'))
302             content.set('src', child.href())
303             nav_point.append(content)
304             nav_map.append(nav_point)
305             counter = child.write_to_xml(nav_point, counter + 1)
306         return counter
307
308     def html_part(self, depth=0):
309         texts = []
310         for child in self.children:
311             texts.append(
312                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
313                 (depth, child.href(), child.name))
314             texts.append(child.html_part(depth + 1))
315         return "\n".join(texts)
316
317     def html(self):
318         with open(get_resource('epub/toc.html')) as f:
319             t = unicode(f.read(), 'utf-8')
320         return t % self.html_part()
321
322
323 def used_chars(element):
324     """ Lists characters used in an ETree Element """
325     chars = set((element.text or '') + (element.tail or ''))
326     for child in element:
327         chars = chars.union(used_chars(child))
328     return chars
329
330
331 def chop(main_text):
332     """ divide main content of the XML file into chunks """
333
334     # prepare a container for each chunk
335     part_xml = etree.Element('utwor')
336     etree.SubElement(part_xml, 'master')
337     main_xml_part = part_xml[0]  # master
338
339     last_node_part = False
340
341     # the below loop are workaround for a problem with epubs in drama ebooks without acts
342     is_scene = False
343     is_act = False
344     for one_part in main_text:
345         name = one_part.tag
346         if name == 'naglowek_scena':
347             is_scene = True
348         elif name == 'naglowek_akt':
349             is_act = True
350
351     for one_part in main_text:
352         name = one_part.tag
353         if is_act is False and is_scene is True:
354             if name == 'naglowek_czesc':
355                 yield part_xml
356                 last_node_part = True
357                 main_xml_part[:] = [deepcopy(one_part)]
358             elif not last_node_part and name == "naglowek_scena":
359                 yield part_xml
360                 main_xml_part[:] = [deepcopy(one_part)]
361             else:
362                 main_xml_part.append(deepcopy(one_part))
363                 last_node_part = False
364         else:
365             if name == 'naglowek_czesc':
366                 yield part_xml
367                 last_node_part = True
368                 main_xml_part[:] = [deepcopy(one_part)]
369             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
370                 yield part_xml
371                 main_xml_part[:] = [deepcopy(one_part)]
372             else:
373                 main_xml_part.append(deepcopy(one_part))
374                 last_node_part = False
375     yield part_xml
376
377
378 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
379     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
380
381     toc = TOC()
382     for element in chunk_xml[0]:
383         if element.tag == "naglowek_czesc":
384             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
385         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
386             toc.add(node_name(element), "part%d.html" % chunk_no)
387         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
388             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
389             element.set('sub', str(subnumber))
390     if empty:
391         if not _empty_html_static:
392             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
393         chars = set()
394         output_html = _empty_html_static[0]
395     else:
396         find_annotations(annotations, chunk_xml, chunk_no)
397         replace_by_verse(chunk_xml)
398         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
399         chars = used_chars(html_tree.getroot())
400         output_html = etree.tostring(
401             html_tree, pretty_print=True, xml_declaration=True,
402             encoding="utf-8",
403             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
404                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
405         )
406     return output_html, toc, chars
407
408
409 def transform(wldoc, verbose=False, style=None, html_toc=False,
410               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path=''):
411     """ produces a EPUB file
412
413     sample=n: generate sample e-book (with at least n paragraphs)
414     cover: a cover.Cover factory or True for default
415     flags: less-advertising, without-fonts, working-copy
416     """
417
418     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
419         """ processes one input file and proceeds to its children """
420
421         replace_characters(wldoc.edoc.getroot())
422
423         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
424         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
425
426         # every input file will have a TOC entry,
427         # pointing to starting chunk
428         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
429         chars = set()
430         if first:
431             # write book title page
432             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'))
433             chars = used_chars(html_tree.getroot())
434             zip.writestr(
435                 'OPS/title.html',
436                 etree.tostring(
437                     html_tree, pretty_print=True, xml_declaration=True,
438                     encoding="utf-8",
439                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
440                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
441                 )
442             )
443             # add a title page TOC entry
444             toc.add(u"Strona tytułowa", "title.html")
445         elif wldoc.book_info.parts:
446             # write title page for every parent
447             if sample is not None and sample <= 0:
448                 chars = set()
449                 html_string = open(get_resource('epub/emptyChunk.html')).read()
450             else:
451                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
452                 chars = used_chars(html_tree.getroot())
453                 html_string = etree.tostring(
454                     html_tree, pretty_print=True, xml_declaration=True,
455                     encoding="utf-8",
456                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
457                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
458                 )
459             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
460             add_to_manifest(manifest, chunk_counter)
461             add_to_spine(spine, chunk_counter)
462             chunk_counter += 1
463
464         if len(wldoc.edoc.getroot()) > 1:
465             # rdf before style master
466             main_text = wldoc.edoc.getroot()[1]
467         else:
468             # rdf in style master
469             main_text = wldoc.edoc.getroot()[0]
470             if main_text.tag == RDFNS('RDF'):
471                 main_text = None
472
473         if main_text is not None:
474             for chunk_xml in chop(main_text):
475                 empty = False
476                 if sample is not None:
477                     if sample <= 0:
478                         empty = True
479                     else:
480                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
481                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
482
483                 toc.extend(chunk_toc)
484                 chars = chars.union(chunk_chars)
485                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
486                 add_to_manifest(manifest, chunk_counter)
487                 add_to_spine(spine, chunk_counter)
488                 chunk_counter += 1
489
490         for child in wldoc.parts():
491             child_toc, chunk_counter, chunk_chars, sample = transform_file(
492                 child, chunk_counter, first=False, sample=sample)
493             toc.append(child_toc)
494             chars = chars.union(chunk_chars)
495
496         return toc, chunk_counter, chars, sample
497
498     document = deepcopy(wldoc)
499     del wldoc
500
501     if flags:
502         for flag in flags:
503             document.edoc.getroot().set(flag, 'yes')
504
505     document.clean_ed_note()
506     document.clean_ed_note('abstrakt')
507
508     # add editors info
509     editors = document.editors()
510     if editors:
511         document.edoc.getroot().set('editors', u', '.join(sorted(
512             editor.readable() for editor in editors)))
513     if document.book_info.funders:
514         document.edoc.getroot().set('funders', u', '.join(
515             document.book_info.funders))
516     if document.book_info.thanks:
517         document.edoc.getroot().set('thanks', document.book_info.thanks)
518
519     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
520     manifest = opf.find('.//' + OPFNS('manifest'))
521     guide = opf.find('.//' + OPFNS('guide'))
522     spine = opf.find('.//' + OPFNS('spine'))
523
524     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
525     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
526
527     functions.reg_mathml_epub(zip)
528
529     for filename in os.listdir(ilustr_path):
530         zip.write(os.path.join(ilustr_path, filename), os.path.join('OPS', filename))
531
532     # write static elements
533     mime = zipfile.ZipInfo()
534     mime.filename = 'mimetype'
535     mime.compress_type = zipfile.ZIP_STORED
536     mime.extra = ''
537     zip.writestr(mime, 'application/epub+zip')
538     zip.writestr(
539         'META-INF/container.xml',
540         '<?xml version="1.0" ?>'
541         '<container version="1.0" '
542         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
543         '<rootfiles><rootfile full-path="OPS/content.opf" '
544         'media-type="application/oebps-package+xml" />'
545         '</rootfiles></container>'
546     )
547     zip.write(get_resource('res/wl-logo-small.png'),
548               os.path.join('OPS', 'logo_wolnelektury.png'))
549     zip.write(get_resource('res/jedenprocent.png'),
550               os.path.join('OPS', 'jedenprocent.png'))
551     if not style:
552         style = get_resource('epub/style.css')
553     zip.write(style, os.path.join('OPS', 'style.css'))
554
555     if cover:
556         if cover is True:
557             cover = DefaultEbookCover
558
559         cover_file = StringIO()
560         bound_cover = cover(document.book_info)
561         bound_cover.save(cover_file)
562         cover_name = 'cover.%s' % bound_cover.ext()
563         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
564         del cover_file
565
566         cover_tree = etree.parse(get_resource('epub/cover.html'))
567         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
568         zip.writestr('OPS/cover.html', etree.tostring(
569             cover_tree, pretty_print=True, xml_declaration=True,
570             encoding="utf-8",
571             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
572                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
573         ))
574
575         if bound_cover.uses_dc_cover:
576             if document.book_info.cover_by:
577                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
578             if document.book_info.cover_source:
579                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
580
581         manifest.append(etree.fromstring(
582             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
583         manifest.append(etree.fromstring(
584             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
585         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
586         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
587         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
588
589     annotations = etree.Element('annotations')
590
591     toc_file = etree.fromstring(
592         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
593         '"-//NISO//DTD ncx 2005-1//EN" '
594         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
595         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
596         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
597         '</navMap></ncx>'
598     )
599     nav_map = toc_file[-1]
600
601     if html_toc:
602         manifest.append(etree.fromstring(
603             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
604         spine.append(etree.fromstring(
605             '<itemref idref="html_toc" />'))
606         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
607
608     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
609
610     if len(toc.children) < 2:
611         toc.add(u"Początek utworu", "part1.html")
612
613     # Last modifications in container files and EPUB creation
614     if len(annotations) > 0:
615         toc.add("Przypisy", "annotations.html")
616         manifest.append(etree.fromstring(
617             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
618         spine.append(etree.fromstring(
619             '<itemref idref="annotations" />'))
620         replace_by_verse(annotations)
621         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
622         chars = chars.union(used_chars(html_tree.getroot()))
623         zip.writestr('OPS/annotations.html', etree.tostring(
624             html_tree, pretty_print=True, xml_declaration=True,
625             encoding="utf-8",
626             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
627                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
628         ))
629
630     toc.add("Wesprzyj Wolne Lektury", "support.html")
631     manifest.append(etree.fromstring(
632         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
633     spine.append(etree.fromstring(
634         '<itemref idref="support" />'))
635     html_string = open(get_resource('epub/support.html')).read()
636     chars.update(used_chars(etree.fromstring(html_string)))
637     zip.writestr('OPS/support.html', html_string)
638
639     toc.add("Strona redakcyjna", "last.html")
640     manifest.append(etree.fromstring(
641         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
642     spine.append(etree.fromstring(
643         '<itemref idref="last" />'))
644     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'))
645     chars.update(used_chars(html_tree.getroot()))
646     zip.writestr('OPS/last.html', etree.tostring(
647         html_tree, pretty_print=True, xml_declaration=True,
648         encoding="utf-8",
649         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
650                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
651     ))
652
653     if not flags or 'without-fonts' not in flags:
654         # strip fonts
655         tmpdir = mkdtemp('-librarian-epub')
656         try:
657             cwd = os.getcwd()
658         except OSError:
659             cwd = None
660
661         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
662         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
663             optimizer_call = ['perl', 'subset.pl', '--chars',
664                               ''.join(chars).encode('utf-8'),
665                               get_resource('fonts/' + fname),
666                               os.path.join(tmpdir, fname)]
667             if verbose:
668                 print "Running font-optimizer"
669                 subprocess.check_call(optimizer_call)
670             else:
671                 subprocess.check_call(optimizer_call, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
672             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
673             manifest.append(etree.fromstring(
674                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
675         rmtree(tmpdir)
676         if cwd is not None:
677             os.chdir(cwd)
678     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
679                  xml_declaration=True, encoding="utf-8"))
680     title = document.book_info.title
681     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
682     for st in attributes:
683         meta = toc_file.makeelement(NCXNS('meta'))
684         meta.set('name', st)
685         meta.set('content', '0')
686         toc_file[0].append(meta)
687     toc_file[0][0].set('content', str(document.book_info.url))
688     toc_file[0][1].set('content', str(toc.depth()))
689     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
690
691     # write TOC
692     if html_toc:
693         toc.add(u"Spis treści", "toc.html", index=1)
694         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
695     toc.write_to_xml(nav_map)
696     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
697                  xml_declaration=True, encoding="utf-8"))
698     zip.close()
699
700     return OutputFile.from_filename(output_file.name)