fix isbn for txt
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import DefaultEbookCover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def set_hyph_language(source_tree):
33     def get_short_lng_code(text):
34         result = ''
35         text = ''.join(text)
36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
37             for line in f:
38                 list = line.strip().split('|')
39                 if list[0] == text:
40                     result = list[2]
41         if result == '':
42             return text
43         else:
44             return result
45     bibl_lng = etree.XPath('//dc:language//text()',
46                            namespaces={'dc': str(DCNS)})(source_tree)
47     short_lng = get_short_lng_code(bibl_lng[0])
48     try:
49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
50                                        short_lng + '.dic'))
51     except:
52         pass
53
54
55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
57     for t in texts:
58         parent = t.getparent()
59         if hyph is not None:
60             newt = ''
61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
62             for w in wlist:
63                 newt += hyph.inserted(w, u'\u00AD')
64         else:
65             newt = t
66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
67         if t.is_text:
68             parent.text = newt
69         elif t.is_tail:
70             parent.tail = newt
71
72
73 def inner_xml(node):
74     """ returns node's text and children as a string
75
76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
77     x<b>y</b>z
78     """
79
80     nt = node.text if node.text is not None else ''
81     return ''.join([nt] + [etree.tostring(child) for child in node])
82
83
84 def set_inner_xml(node, text):
85     """ sets node's text and children from a string
86
87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
88     >>> set_inner_xml(e, 'x<b>y</b>z')
89     >>> print etree.tostring(e)
90     <a>x<b>y</b>z</a>
91     """
92
93     p = etree.fromstring('<x>%s</x>' % text)
94     node.text = p.text
95     node[:] = p[:]
96
97
98 def node_name(node):
99     """ Find out a node's name
100
101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
102     XYZ
103     """
104
105     tempnode = deepcopy(node)
106
107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
108         for e in tempnode.findall('.//%s' % p):
109             t = e.tail
110             e.clear()
111             e.tail = t
112     etree.strip_tags(tempnode, '*')
113     return tempnode.text
114
115
116 def xslt(xml, sheet, **kwargs):
117     if isinstance(xml, etree._Element):
118         xml = etree.ElementTree(xml)
119     with open(sheet) as xsltf:
120         transform = etree.XSLT(etree.parse(xsltf))
121         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
122         return transform(xml, **params)
123
124
125 def replace_characters(node):
126     def replace_chars(text):
127         if text is None:
128             return None
129         return text.replace(u"\ufeff", u"")\
130                    .replace("---", u"\u2014")\
131                    .replace("--", u"\u2013")\
132                    .replace(",,", u"\u201E")\
133                    .replace('"', u"\u201D")\
134                    .replace("'", u"\u2019")
135     if node.tag in ('uwaga', 'extra'):
136         t = node.tail
137         node.clear()
138         node.tail = t
139     node.text = replace_chars(node.text)
140     node.tail = replace_chars(node.tail)
141     for child in node:
142         replace_characters(child)
143
144
145 def find_annotations(annotations, source, part_no):
146     for child in source:
147         if child.tag in ('pe', 'pa', 'pt', 'pr'):
148             annotation = deepcopy(child)
149             number = str(len(annotations) + 1)
150             annotation.set('number', number)
151             annotation.set('part', str(part_no))
152             annotation.tail = ''
153             annotations.append(annotation)
154             tail = child.tail
155             child.clear()
156             child.tail = tail
157             child.text = number
158         if child.tag not in ('extra', 'uwaga'):
159             find_annotations(annotations, child, part_no)
160
161
162 class Stanza(object):
163     """
164     Converts / verse endings into verse elements in a stanza.
165
166     Slashes may only occur directly in the stanza. Any slashes in subelements
167     will be ignored, and the subelements will be put inside verse elements.
168
169     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
170     >>> Stanza(s).versify()
171     >>> print etree.tostring(s)
172     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
173     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
174
175     """
176     def __init__(self, stanza_elem):
177         self.stanza = stanza_elem
178         self.verses = []
179         self.open_verse = None
180
181     def versify(self):
182         self.push_text(self.stanza.text)
183         for elem in self.stanza:
184             self.push_elem(elem)
185             self.push_text(elem.tail)
186         tail = self.stanza.tail
187         self.stanza.clear()
188         self.stanza.tail = tail
189         self.stanza.extend(self.verses)
190
191     def open_normal_verse(self):
192         self.open_verse = self.stanza.makeelement("wers_normalny")
193         self.verses.append(self.open_verse)
194
195     def get_open_verse(self):
196         if self.open_verse is None:
197             self.open_normal_verse()
198         return self.open_verse
199
200     def push_text(self, text):
201         if not text:
202             return
203         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
204             if i:
205                 self.open_normal_verse()
206             verse = self.get_open_verse()
207             if len(verse):
208                 verse[-1].tail = (verse[-1].tail or "") + verse_text
209             else:
210                 verse.text = (verse.text or "") + verse_text
211
212     def push_elem(self, elem):
213         if elem.tag.startswith("wers"):
214             verse = deepcopy(elem)
215             verse.tail = None
216             self.verses.append(verse)
217             self.open_verse = verse
218         else:
219             appended = deepcopy(elem)
220             appended.tail = None
221             self.get_open_verse().append(appended)
222
223
224 def replace_by_verse(tree):
225     """ Find stanzas and create new verses in place of a '/' character """
226
227     stanzas = tree.findall('.//' + WLNS('strofa'))
228     for stanza in stanzas:
229         Stanza(stanza).versify()
230
231
232 def add_to_manifest(manifest, partno):
233     """ Adds a node to the manifest section in content.opf file """
234
235     partstr = 'part%d' % partno
236     e = manifest.makeelement(
237         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
238                                'media-type': 'application/xhtml+xml'}
239     )
240     manifest.append(e)
241
242
243 def add_to_spine(spine, partno):
244     """ Adds a node to the spine section in content.opf file """
245
246     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
247     spine.append(e)
248
249
250 class TOC(object):
251     def __init__(self, name=None, part_href=None):
252         self.children = []
253         self.name = name
254         self.part_href = part_href
255         self.sub_number = None
256
257     def add(self, name, part_href, level=0, is_part=True, index=None):
258         assert level == 0 or index is None
259         if level > 0 and self.children:
260             return self.children[-1].add(name, part_href, level - 1, is_part)
261         else:
262             t = TOC(name)
263             t.part_href = part_href
264             if index is not None:
265                 self.children.insert(index, t)
266             else:
267                 self.children.append(t)
268             if not is_part:
269                 t.sub_number = len(self.children) + 1
270                 return t.sub_number
271
272     def append(self, toc):
273         self.children.append(toc)
274
275     def extend(self, toc):
276         self.children.extend(toc.children)
277
278     def depth(self):
279         if self.children:
280             return max((c.depth() for c in self.children)) + 1
281         else:
282             return 0
283
284     def href(self):
285         src = self.part_href
286         if self.sub_number is not None:
287             src += '#sub%d' % self.sub_number
288         return src
289
290     def write_to_xml(self, nav_map, counter=1):
291         for child in self.children:
292             nav_point = nav_map.makeelement(NCXNS('navPoint'))
293             nav_point.set('id', 'NavPoint-%d' % counter)
294             nav_point.set('playOrder', str(counter))
295
296             nav_label = nav_map.makeelement(NCXNS('navLabel'))
297             text = nav_map.makeelement(NCXNS('text'))
298             if child.name is not None:
299                 text.text = re.sub(r'\n', ' ', child.name)
300             else:
301                 text.text = child.name
302             nav_label.append(text)
303             nav_point.append(nav_label)
304
305             content = nav_map.makeelement(NCXNS('content'))
306             content.set('src', child.href())
307             nav_point.append(content)
308             nav_map.append(nav_point)
309             counter = child.write_to_xml(nav_point, counter + 1)
310         return counter
311
312     def html_part(self, depth=0):
313         texts = []
314         for child in self.children:
315             texts.append(
316                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
317                 (depth, child.href(), child.name))
318             texts.append(child.html_part(depth + 1))
319         return "\n".join(texts)
320
321     def html(self):
322         with open(get_resource('epub/toc.html')) as f:
323             t = unicode(f.read(), 'utf-8')
324         return t % self.html_part()
325
326
327 def used_chars(element):
328     """ Lists characters used in an ETree Element """
329     chars = set((element.text or '') + (element.tail or ''))
330     for child in element:
331         chars = chars.union(used_chars(child))
332     return chars
333
334
335 def chop(main_text):
336     """ divide main content of the XML file into chunks """
337
338     # prepare a container for each chunk
339     part_xml = etree.Element('utwor')
340     etree.SubElement(part_xml, 'master')
341     main_xml_part = part_xml[0]  # master
342
343     last_node_part = False
344
345     # the below loop are workaround for a problem with epubs in drama ebooks without acts
346     is_scene = False
347     is_act = False
348     for one_part in main_text:
349         name = one_part.tag
350         if name == 'naglowek_scena':
351             is_scene = True
352         elif name == 'naglowek_akt':
353             is_act = True
354
355     for one_part in main_text:
356         name = one_part.tag
357         if is_act is False and is_scene is True:
358             if name == 'naglowek_czesc':
359                 yield part_xml
360                 last_node_part = True
361                 main_xml_part[:] = [deepcopy(one_part)]
362             elif not last_node_part and name == "naglowek_scena":
363                 yield part_xml
364                 main_xml_part[:] = [deepcopy(one_part)]
365             else:
366                 main_xml_part.append(deepcopy(one_part))
367                 last_node_part = False
368         else:
369             if name == 'naglowek_czesc':
370                 yield part_xml
371                 last_node_part = True
372                 main_xml_part[:] = [deepcopy(one_part)]
373             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
374                 yield part_xml
375                 main_xml_part[:] = [deepcopy(one_part)]
376             else:
377                 main_xml_part.append(deepcopy(one_part))
378                 last_node_part = False
379     yield part_xml
380
381
382 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
383     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
384
385     toc = TOC()
386     for element in chunk_xml[0]:
387         if element.tag == "naglowek_czesc":
388             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
389         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
390             toc.add(node_name(element), "part%d.html" % chunk_no)
391         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
392             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
393             element.set('sub', str(subnumber))
394     if empty:
395         if not _empty_html_static:
396             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
397         chars = set()
398         output_html = _empty_html_static[0]
399     else:
400         find_annotations(annotations, chunk_xml, chunk_no)
401         replace_by_verse(chunk_xml)
402         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
403         chars = used_chars(html_tree.getroot())
404         output_html = etree.tostring(
405             html_tree, pretty_print=True, xml_declaration=True,
406             encoding="utf-8",
407             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
408                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
409         )
410     return output_html, toc, chars
411
412
413 def transform(wldoc, verbose=False, style=None, html_toc=False,
414               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
415     """ produces a EPUB file
416
417     sample=n: generate sample e-book (with at least n paragraphs)
418     cover: a cover.Cover factory or True for default
419     flags: less-advertising, without-fonts, working-copy
420     """
421
422     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
423         """ processes one input file and proceeds to its children """
424
425         replace_characters(wldoc.edoc.getroot())
426
427         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
428         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
429
430         # every input file will have a TOC entry,
431         # pointing to starting chunk
432         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
433         chars = set()
434         if first:
435             # write book title page
436             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
437             chars = used_chars(html_tree.getroot())
438             zip.writestr(
439                 'OPS/title.html',
440                 etree.tostring(
441                     html_tree, pretty_print=True, xml_declaration=True,
442                     encoding="utf-8",
443                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
444                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
445                 )
446             )
447             # add a title page TOC entry
448             toc.add(u"Strona tytułowa", "title.html")
449         elif wldoc.book_info.parts:
450             # write title page for every parent
451             if sample is not None and sample <= 0:
452                 chars = set()
453                 html_string = open(get_resource('epub/emptyChunk.html')).read()
454             else:
455                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
456                 chars = used_chars(html_tree.getroot())
457                 html_string = etree.tostring(
458                     html_tree, pretty_print=True, xml_declaration=True,
459                     encoding="utf-8",
460                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
461                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
462                 )
463             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
464             add_to_manifest(manifest, chunk_counter)
465             add_to_spine(spine, chunk_counter)
466             chunk_counter += 1
467
468         if len(wldoc.edoc.getroot()) > 1:
469             # rdf before style master
470             main_text = wldoc.edoc.getroot()[1]
471         else:
472             # rdf in style master
473             main_text = wldoc.edoc.getroot()[0]
474             if main_text.tag == RDFNS('RDF'):
475                 main_text = None
476
477         if main_text is not None:
478             for chunk_xml in chop(main_text):
479                 empty = False
480                 if sample is not None:
481                     if sample <= 0:
482                         empty = True
483                     else:
484                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
485                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
486
487                 toc.extend(chunk_toc)
488                 chars = chars.union(chunk_chars)
489                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
490                 add_to_manifest(manifest, chunk_counter)
491                 add_to_spine(spine, chunk_counter)
492                 chunk_counter += 1
493
494         for child in wldoc.parts():
495             child_toc, chunk_counter, chunk_chars, sample = transform_file(
496                 child, chunk_counter, first=False, sample=sample)
497             toc.append(child_toc)
498             chars = chars.union(chunk_chars)
499
500         return toc, chunk_counter, chars, sample
501
502     document = deepcopy(wldoc)
503     del wldoc
504
505     if flags:
506         for flag in flags:
507             document.edoc.getroot().set(flag, 'yes')
508
509     document.clean_ed_note()
510     document.clean_ed_note('abstrakt')
511
512     # add editors info
513     editors = document.editors()
514     if editors:
515         document.edoc.getroot().set('editors', u', '.join(sorted(
516             editor.readable() for editor in editors)))
517     if document.book_info.funders:
518         document.edoc.getroot().set('funders', u', '.join(
519             document.book_info.funders))
520     if document.book_info.thanks:
521         document.edoc.getroot().set('thanks', document.book_info.thanks)
522
523     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
524     manifest = opf.find('.//' + OPFNS('manifest'))
525     guide = opf.find('.//' + OPFNS('guide'))
526     spine = opf.find('.//' + OPFNS('spine'))
527
528     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
529     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
530
531     functions.reg_mathml_epub(zip)
532
533     if os.path.isdir(ilustr_path):
534         for i, filename in enumerate(os.listdir(ilustr_path)):
535             file_path = os.path.join(ilustr_path, filename)
536             zip.write(file_path, os.path.join('OPS', filename))
537             image_id = 'image%s' % i
538             manifest.append(etree.fromstring(
539                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
540
541     # write static elements
542     mime = zipfile.ZipInfo()
543     mime.filename = 'mimetype'
544     mime.compress_type = zipfile.ZIP_STORED
545     mime.extra = ''
546     zip.writestr(mime, 'application/epub+zip')
547     zip.writestr(
548         'META-INF/container.xml',
549         '<?xml version="1.0" ?>'
550         '<container version="1.0" '
551         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
552         '<rootfiles><rootfile full-path="OPS/content.opf" '
553         'media-type="application/oebps-package+xml" />'
554         '</rootfiles></container>'
555     )
556     zip.write(get_resource('res/wl-logo-small.png'),
557               os.path.join('OPS', 'logo_wolnelektury.png'))
558     zip.write(get_resource('res/jedenprocent.png'),
559               os.path.join('OPS', 'jedenprocent.png'))
560     if not style:
561         style = get_resource('epub/style.css')
562     zip.write(style, os.path.join('OPS', 'style.css'))
563
564     if cover:
565         if cover is True:
566             cover = DefaultEbookCover
567
568         cover_file = StringIO()
569         bound_cover = cover(document.book_info)
570         bound_cover.save(cover_file)
571         cover_name = 'cover.%s' % bound_cover.ext()
572         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
573         del cover_file
574
575         cover_tree = etree.parse(get_resource('epub/cover.html'))
576         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
577         zip.writestr('OPS/cover.html', etree.tostring(
578             cover_tree, pretty_print=True, xml_declaration=True,
579             encoding="utf-8",
580             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
581                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
582         ))
583
584         if bound_cover.uses_dc_cover:
585             if document.book_info.cover_by:
586                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
587             if document.book_info.cover_source:
588                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
589
590         manifest.append(etree.fromstring(
591             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
592         manifest.append(etree.fromstring(
593             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
594         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
595         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
596         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
597
598     annotations = etree.Element('annotations')
599
600     toc_file = etree.fromstring(
601         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
602         '"-//NISO//DTD ncx 2005-1//EN" '
603         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
604         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
605         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
606         '</navMap></ncx>'
607     )
608     nav_map = toc_file[-1]
609
610     if html_toc:
611         manifest.append(etree.fromstring(
612             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
613         spine.append(etree.fromstring(
614             '<itemref idref="html_toc" />'))
615         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
616
617     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
618
619     if len(toc.children) < 2:
620         toc.add(u"Początek utworu", "part1.html")
621
622     # Last modifications in container files and EPUB creation
623     if len(annotations) > 0:
624         toc.add("Przypisy", "annotations.html")
625         manifest.append(etree.fromstring(
626             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
627         spine.append(etree.fromstring(
628             '<itemref idref="annotations" />'))
629         replace_by_verse(annotations)
630         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
631         chars = chars.union(used_chars(html_tree.getroot()))
632         zip.writestr('OPS/annotations.html', etree.tostring(
633             html_tree, pretty_print=True, xml_declaration=True,
634             encoding="utf-8",
635             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
636                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
637         ))
638
639     toc.add("Wesprzyj Wolne Lektury", "support.html")
640     manifest.append(etree.fromstring(
641         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
642     spine.append(etree.fromstring(
643         '<itemref idref="support" />'))
644     html_string = open(get_resource('epub/support.html')).read()
645     chars.update(used_chars(etree.fromstring(html_string)))
646     zip.writestr('OPS/support.html', html_string)
647
648     toc.add("Strona redakcyjna", "last.html")
649     manifest.append(etree.fromstring(
650         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
651     spine.append(etree.fromstring(
652         '<itemref idref="last" />'))
653     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
654     chars.update(used_chars(html_tree.getroot()))
655     zip.writestr('OPS/last.html', etree.tostring(
656         html_tree, pretty_print=True, xml_declaration=True,
657         encoding="utf-8",
658         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
659                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
660     ))
661
662     if not flags or 'without-fonts' not in flags:
663         # strip fonts
664         tmpdir = mkdtemp('-librarian-epub')
665         try:
666             cwd = os.getcwd()
667         except OSError:
668             cwd = None
669
670         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
671         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
672             optimizer_call = ['perl', 'subset.pl', '--chars',
673                               ''.join(chars).encode('utf-8'),
674                               get_resource('fonts/' + fname),
675                               os.path.join(tmpdir, fname)]
676             if verbose:
677                 print "Running font-optimizer"
678                 subprocess.check_call(optimizer_call)
679             else:
680                 dev_null = open(os.devnull, 'w')
681                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null)
682             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
683             manifest.append(etree.fromstring(
684                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
685         rmtree(tmpdir)
686         if cwd is not None:
687             os.chdir(cwd)
688     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
689                  xml_declaration=True, encoding="utf-8"))
690     title = document.book_info.title
691     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
692     for st in attributes:
693         meta = toc_file.makeelement(NCXNS('meta'))
694         meta.set('name', st)
695         meta.set('content', '0')
696         toc_file[0].append(meta)
697     toc_file[0][0].set('content', str(document.book_info.url))
698     toc_file[0][1].set('content', str(toc.depth()))
699     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
700
701     # write TOC
702     if html_toc:
703         toc.add(u"Spis treści", "toc.html", index=1)
704         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
705     toc.write_to_xml(nav_map)
706     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
707                  xml_declaration=True, encoding="utf-8"))
708     zip.close()
709
710     return OutputFile.from_filename(output_file.name)