fix for newlines in epub
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import make_cover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def squeeze_whitespace(s):
33     return re.sub(r'\s+', ' ', s)
34
35
36 def set_hyph_language(source_tree):
37     def get_short_lng_code(text):
38         result = ''
39         text = ''.join(text)
40         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
41             for line in f:
42                 list = line.strip().split('|')
43                 if list[0] == text:
44                     result = list[2]
45         if result == '':
46             return text
47         else:
48             return result
49     bibl_lng = etree.XPath('//dc:language//text()',
50                            namespaces={'dc': str(DCNS)})(source_tree)
51     short_lng = get_short_lng_code(bibl_lng[0])
52     try:
53         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
54                                        short_lng + '.dic'))
55     except:
56         pass
57
58
59 def hyphenate_and_fix_conjunctions(source_tree, hyph):
60     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
61     for t in texts:
62         parent = t.getparent()
63         if hyph is not None:
64             newt = ''
65             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
66             for w in wlist:
67                 newt += hyph.inserted(w, u'\u00AD')
68         else:
69             newt = t
70         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
71         if t.is_text:
72             parent.text = newt
73         elif t.is_tail:
74             parent.tail = newt
75
76
77 def inner_xml(node):
78     """ returns node's text and children as a string
79
80     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
81     x<b>y</b>z
82     """
83
84     nt = node.text if node.text is not None else ''
85     return ''.join([nt] + [etree.tostring(child) for child in node])
86
87
88 def set_inner_xml(node, text):
89     """ sets node's text and children from a string
90
91     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
92     >>> set_inner_xml(e, 'x<b>y</b>z')
93     >>> print etree.tostring(e)
94     <a>x<b>y</b>z</a>
95     """
96
97     p = etree.fromstring('<x>%s</x>' % text)
98     node.text = p.text
99     node[:] = p[:]
100
101
102 def node_name(node):
103     """ Find out a node's name
104
105     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
106     XYZ
107     """
108
109     tempnode = deepcopy(node)
110
111     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
112         for e in tempnode.findall('.//%s' % p):
113             t = e.tail
114             e.clear()
115             e.tail = t
116     etree.strip_tags(tempnode, '*')
117     return tempnode.text
118
119
120 def xslt(xml, sheet, **kwargs):
121     if isinstance(xml, etree._Element):
122         xml = etree.ElementTree(xml)
123     with open(sheet) as xsltf:
124         transform = etree.XSLT(etree.parse(xsltf))
125         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
126         return transform(xml, **params)
127
128
129 def replace_characters(node):
130     def replace_chars(text):
131         if text is None:
132             return None
133         return text.replace(u"\ufeff", u"")\
134                    .replace("---", u"\u2014")\
135                    .replace("--", u"\u2013")\
136                    .replace(",,", u"\u201E")\
137                    .replace('"', u"\u201D")\
138                    .replace("'", u"\u2019")
139     if node.tag in ('uwaga', 'extra'):
140         t = node.tail
141         node.clear()
142         node.tail = t
143     node.text = replace_chars(node.text)
144     node.tail = replace_chars(node.tail)
145     for child in node:
146         replace_characters(child)
147
148
149 def find_annotations(annotations, source, part_no):
150     for child in source:
151         if child.tag in ('pe', 'pa', 'pt', 'pr'):
152             annotation = deepcopy(child)
153             number = str(len(annotations) + 1)
154             annotation.set('number', number)
155             annotation.set('part', str(part_no))
156             annotation.tail = ''
157             annotations.append(annotation)
158             tail = child.tail
159             child.clear()
160             child.tail = tail
161             child.text = number
162         if child.tag not in ('extra', 'uwaga'):
163             find_annotations(annotations, child, part_no)
164
165
166 class Stanza(object):
167     """
168     Converts / verse endings into verse elements in a stanza.
169
170     Slashes may only occur directly in the stanza. Any slashes in subelements
171     will be ignored, and the subelements will be put inside verse elements.
172
173     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
174     >>> Stanza(s).versify()
175     >>> print etree.tostring(s)
176     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
177     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
178
179     """
180     def __init__(self, stanza_elem):
181         self.stanza = stanza_elem
182         self.verses = []
183         self.open_verse = None
184
185     def versify(self):
186         self.push_text(self.stanza.text)
187         for elem in self.stanza:
188             self.push_elem(elem)
189             self.push_text(elem.tail)
190         tail = self.stanza.tail
191         self.stanza.clear()
192         self.stanza.tail = tail
193         self.stanza.extend(self.verses)
194
195     def open_normal_verse(self):
196         self.open_verse = self.stanza.makeelement("wers_normalny")
197         self.verses.append(self.open_verse)
198
199     def get_open_verse(self):
200         if self.open_verse is None:
201             self.open_normal_verse()
202         return self.open_verse
203
204     def push_text(self, text):
205         if not text:
206             return
207         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
208             if i:
209                 self.open_normal_verse()
210             if not verse_text.strip():
211                 continue
212             verse = self.get_open_verse()
213             if len(verse):
214                 verse[-1].tail = (verse[-1].tail or "") + verse_text
215             else:
216                 verse.text = (verse.text or "") + verse_text
217
218     def push_elem(self, elem):
219         if elem.tag.startswith("wers"):
220             verse = deepcopy(elem)
221             verse.tail = None
222             self.verses.append(verse)
223             self.open_verse = verse
224         else:
225             appended = deepcopy(elem)
226             appended.tail = None
227             self.get_open_verse().append(appended)
228
229
230 def replace_by_verse(tree):
231     """ Find stanzas and create new verses in place of a '/' character """
232
233     stanzas = tree.findall('.//' + WLNS('strofa'))
234     for stanza in stanzas:
235         Stanza(stanza).versify()
236
237
238 def add_to_manifest(manifest, partno):
239     """ Adds a node to the manifest section in content.opf file """
240
241     partstr = 'part%d' % partno
242     e = manifest.makeelement(
243         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
244                                'media-type': 'application/xhtml+xml'}
245     )
246     manifest.append(e)
247
248
249 def add_to_spine(spine, partno):
250     """ Adds a node to the spine section in content.opf file """
251
252     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
253     spine.append(e)
254
255
256 class TOC(object):
257     def __init__(self, name=None, part_href=None):
258         self.children = []
259         self.name = name
260         self.part_href = part_href
261         self.sub_number = None
262
263     def add(self, name, part_href, level=0, is_part=True, index=None):
264         assert level == 0 or index is None
265         if level > 0 and self.children:
266             return self.children[-1].add(name, part_href, level - 1, is_part)
267         else:
268             t = TOC(name)
269             t.part_href = part_href
270             if index is not None:
271                 self.children.insert(index, t)
272             else:
273                 self.children.append(t)
274             if not is_part:
275                 t.sub_number = len(self.children) + 1
276                 return t.sub_number
277
278     def append(self, toc):
279         self.children.append(toc)
280
281     def extend(self, toc):
282         self.children.extend(toc.children)
283
284     def depth(self):
285         if self.children:
286             return max((c.depth() for c in self.children)) + 1
287         else:
288             return 0
289
290     def href(self):
291         src = self.part_href
292         if self.sub_number is not None:
293             src += '#sub%d' % self.sub_number
294         return src
295
296     def write_to_xml(self, nav_map, counter=1):
297         for child in self.children:
298             nav_point = nav_map.makeelement(NCXNS('navPoint'))
299             nav_point.set('id', 'NavPoint-%d' % counter)
300             nav_point.set('playOrder', str(counter))
301
302             nav_label = nav_map.makeelement(NCXNS('navLabel'))
303             text = nav_map.makeelement(NCXNS('text'))
304             if child.name is not None:
305                 text.text = re.sub(r'\n', ' ', child.name)
306             else:
307                 text.text = child.name
308             nav_label.append(text)
309             nav_point.append(nav_label)
310
311             content = nav_map.makeelement(NCXNS('content'))
312             content.set('src', child.href())
313             nav_point.append(content)
314             nav_map.append(nav_point)
315             counter = child.write_to_xml(nav_point, counter + 1)
316         return counter
317
318     def html_part(self, depth=0):
319         texts = []
320         for child in self.children:
321             texts.append(
322                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
323                 (depth, child.href(), child.name))
324             texts.append(child.html_part(depth + 1))
325         return "\n".join(texts)
326
327     def html(self):
328         with open(get_resource('epub/toc.html')) as f:
329             t = unicode(f.read(), 'utf-8')
330         return t % self.html_part()
331
332
333 def used_chars(element):
334     """ Lists characters used in an ETree Element """
335     chars = set((element.text or '') + (element.tail or ''))
336     for child in element:
337         chars = chars.union(used_chars(child))
338     return chars
339
340
341 def chop(main_text):
342     """ divide main content of the XML file into chunks """
343
344     # prepare a container for each chunk
345     part_xml = etree.Element('utwor')
346     etree.SubElement(part_xml, 'master')
347     main_xml_part = part_xml[0]  # master
348
349     last_node_part = False
350
351     # the below loop are workaround for a problem with epubs in drama ebooks without acts
352     is_scene = False
353     is_act = False
354     for one_part in main_text:
355         name = one_part.tag
356         if name == 'naglowek_scena':
357             is_scene = True
358         elif name == 'naglowek_akt':
359             is_act = True
360
361     for one_part in main_text:
362         name = one_part.tag
363         if is_act is False and is_scene is True:
364             if name == 'naglowek_czesc':
365                 yield part_xml
366                 last_node_part = True
367                 main_xml_part[:] = [deepcopy(one_part)]
368             elif not last_node_part and name == "naglowek_scena":
369                 yield part_xml
370                 main_xml_part[:] = [deepcopy(one_part)]
371             else:
372                 main_xml_part.append(deepcopy(one_part))
373                 last_node_part = False
374         else:
375             if name == 'naglowek_czesc':
376                 yield part_xml
377                 last_node_part = True
378                 main_xml_part[:] = [deepcopy(one_part)]
379             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
380                 yield part_xml
381                 main_xml_part[:] = [deepcopy(one_part)]
382             else:
383                 main_xml_part.append(deepcopy(one_part))
384                 last_node_part = False
385     yield part_xml
386
387
388 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
389     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
390
391     toc = TOC()
392     for element in chunk_xml[0]:
393         if element.tag == "naglowek_czesc":
394             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
395         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
396             toc.add(node_name(element), "part%d.html" % chunk_no)
397         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
398             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
399             element.set('sub', str(subnumber))
400     if empty:
401         if not _empty_html_static:
402             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
403         chars = set()
404         output_html = _empty_html_static[0]
405     else:
406         find_annotations(annotations, chunk_xml, chunk_no)
407         replace_by_verse(chunk_xml)
408         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
409         chars = used_chars(html_tree.getroot())
410         output_html = etree.tostring(
411             html_tree, pretty_print=True, xml_declaration=True,
412             encoding="utf-8",
413             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
414                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
415         )
416     return output_html, toc, chars
417
418
419 def transform(wldoc, verbose=False, style=None, html_toc=False,
420               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
421     """ produces a EPUB file
422
423     sample=n: generate sample e-book (with at least n paragraphs)
424     cover: a cover.Cover factory or True for default
425     flags: less-advertising, without-fonts, working-copy
426     """
427
428     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
429         """ processes one input file and proceeds to its children """
430
431         replace_characters(wldoc.edoc.getroot())
432
433         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
434         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
435
436         # every input file will have a TOC entry,
437         # pointing to starting chunk
438         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
439         chars = set()
440         if first:
441             # write book title page
442             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
443             chars = used_chars(html_tree.getroot())
444             html_string = etree.tostring(
445                 html_tree, pretty_print=True, xml_declaration=True,
446                 encoding="utf-8",
447                 doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
448                         ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
449             )
450             zip.writestr('OPS/title.html', squeeze_whitespace(html_string))
451             # add a title page TOC entry
452             toc.add(u"Strona tytułowa", "title.html")
453         elif wldoc.book_info.parts:
454             # write title page for every parent
455             if sample is not None and sample <= 0:
456                 chars = set()
457                 html_string = open(get_resource('epub/emptyChunk.html')).read()
458             else:
459                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
460                 chars = used_chars(html_tree.getroot())
461                 html_string = etree.tostring(
462                     html_tree, pretty_print=True, xml_declaration=True,
463                     encoding="utf-8",
464                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
465                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
466                 )
467             zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(html_string))
468             add_to_manifest(manifest, chunk_counter)
469             add_to_spine(spine, chunk_counter)
470             chunk_counter += 1
471
472         if len(wldoc.edoc.getroot()) > 1:
473             # rdf before style master
474             main_text = wldoc.edoc.getroot()[1]
475         else:
476             # rdf in style master
477             main_text = wldoc.edoc.getroot()[0]
478             if main_text.tag == RDFNS('RDF'):
479                 main_text = None
480
481         if main_text is not None:
482             for chunk_xml in chop(main_text):
483                 empty = False
484                 if sample is not None:
485                     if sample <= 0:
486                         empty = True
487                     else:
488                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
489                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
490
491                 toc.extend(chunk_toc)
492                 chars = chars.union(chunk_chars)
493                 zip.writestr('OPS/part%d.html' % chunk_counter, squeeze_whitespace(chunk_html))
494                 add_to_manifest(manifest, chunk_counter)
495                 add_to_spine(spine, chunk_counter)
496                 chunk_counter += 1
497
498         for child in wldoc.parts():
499             child_toc, chunk_counter, chunk_chars, sample = transform_file(
500                 child, chunk_counter, first=False, sample=sample)
501             toc.append(child_toc)
502             chars = chars.union(chunk_chars)
503
504         return toc, chunk_counter, chars, sample
505
506     document = deepcopy(wldoc)
507     del wldoc
508
509     if flags:
510         for flag in flags:
511             document.edoc.getroot().set(flag, 'yes')
512
513     document.clean_ed_note()
514     document.clean_ed_note('abstrakt')
515
516     # add editors info
517     editors = document.editors()
518     if editors:
519         document.edoc.getroot().set('editors', u', '.join(sorted(
520             editor.readable() for editor in editors)))
521     if document.book_info.funders:
522         document.edoc.getroot().set('funders', u', '.join(
523             document.book_info.funders))
524     if document.book_info.thanks:
525         document.edoc.getroot().set('thanks', document.book_info.thanks)
526
527     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
528     manifest = opf.find('.//' + OPFNS('manifest'))
529     guide = opf.find('.//' + OPFNS('guide'))
530     spine = opf.find('.//' + OPFNS('spine'))
531
532     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
533     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
534
535     functions.reg_mathml_epub(zip)
536
537     if os.path.isdir(ilustr_path):
538         for i, filename in enumerate(os.listdir(ilustr_path)):
539             file_path = os.path.join(ilustr_path, filename)
540             zip.write(file_path, os.path.join('OPS', filename))
541             image_id = 'image%s' % i
542             manifest.append(etree.fromstring(
543                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
544
545     # write static elements
546     mime = zipfile.ZipInfo()
547     mime.filename = 'mimetype'
548     mime.compress_type = zipfile.ZIP_STORED
549     mime.extra = ''
550     zip.writestr(mime, 'application/epub+zip')
551     zip.writestr(
552         'META-INF/container.xml',
553         '<?xml version="1.0" ?>'
554         '<container version="1.0" '
555         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
556         '<rootfiles><rootfile full-path="OPS/content.opf" '
557         'media-type="application/oebps-package+xml" />'
558         '</rootfiles></container>'
559     )
560     zip.write(get_resource('res/wl-logo-small.png'),
561               os.path.join('OPS', 'logo_wolnelektury.png'))
562     zip.write(get_resource('res/jedenprocent.png'),
563               os.path.join('OPS', 'jedenprocent.png'))
564     if not style:
565         style = get_resource('epub/style.css')
566     zip.write(style, os.path.join('OPS', 'style.css'))
567
568     if cover:
569         if cover is True:
570             cover = make_cover
571
572         cover_file = StringIO()
573         bound_cover = cover(document.book_info)
574         bound_cover.save(cover_file)
575         cover_name = 'cover.%s' % bound_cover.ext()
576         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
577         del cover_file
578
579         cover_tree = etree.parse(get_resource('epub/cover.html'))
580         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
581         zip.writestr('OPS/cover.html', etree.tostring(
582             cover_tree, pretty_print=True, xml_declaration=True,
583             encoding="utf-8",
584             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
585                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
586         ))
587
588         if bound_cover.uses_dc_cover:
589             if document.book_info.cover_by:
590                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
591             if document.book_info.cover_source:
592                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
593
594         manifest.append(etree.fromstring(
595             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
596         manifest.append(etree.fromstring(
597             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
598         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
599         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
600         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
601
602     annotations = etree.Element('annotations')
603
604     toc_file = etree.fromstring(
605         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
606         '"-//NISO//DTD ncx 2005-1//EN" '
607         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
608         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
609         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
610         '</navMap></ncx>'
611     )
612     nav_map = toc_file[-1]
613
614     if html_toc:
615         manifest.append(etree.fromstring(
616             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
617         spine.append(etree.fromstring(
618             '<itemref idref="html_toc" />'))
619         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
620
621     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
622
623     if len(toc.children) < 2:
624         toc.add(u"Początek utworu", "part1.html")
625
626     # Last modifications in container files and EPUB creation
627     if len(annotations) > 0:
628         toc.add("Przypisy", "annotations.html")
629         manifest.append(etree.fromstring(
630             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
631         spine.append(etree.fromstring(
632             '<itemref idref="annotations" />'))
633         replace_by_verse(annotations)
634         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
635         chars = chars.union(used_chars(html_tree.getroot()))
636         zip.writestr('OPS/annotations.html', etree.tostring(
637             html_tree, pretty_print=True, xml_declaration=True,
638             encoding="utf-8",
639             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
640                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
641         ))
642
643     toc.add("Wesprzyj Wolne Lektury", "support.html")
644     manifest.append(etree.fromstring(
645         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
646     spine.append(etree.fromstring(
647         '<itemref idref="support" />'))
648     html_string = open(get_resource('epub/support.html')).read()
649     chars.update(used_chars(etree.fromstring(html_string)))
650     zip.writestr('OPS/support.html', squeeze_whitespace(html_string))
651
652     toc.add("Strona redakcyjna", "last.html")
653     manifest.append(etree.fromstring(
654         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
655     spine.append(etree.fromstring(
656         '<itemref idref="last" />'))
657     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
658     chars.update(used_chars(html_tree.getroot()))
659     zip.writestr('OPS/last.html', squeeze_whitespace(etree.tostring(
660         html_tree, pretty_print=True, xml_declaration=True,
661         encoding="utf-8",
662         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
663                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
664     )))
665
666     if not flags or 'without-fonts' not in flags:
667         # strip fonts
668         tmpdir = mkdtemp('-librarian-epub')
669         try:
670             cwd = os.getcwd()
671         except OSError:
672             cwd = None
673
674         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
675         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
676             optimizer_call = ['perl', 'subset.pl', '--chars',
677                               ''.join(chars).encode('utf-8'),
678                               get_resource('fonts/' + fname),
679                               os.path.join(tmpdir, fname)]
680             env = {"PERL_USE_UNSAFE_INC": "1"}
681             if verbose:
682                 print "Running font-optimizer"
683                 subprocess.check_call(optimizer_call, env=env)
684             else:
685                 dev_null = open(os.devnull, 'w')
686                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
687             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
688             manifest.append(etree.fromstring(
689                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
690         rmtree(tmpdir)
691         if cwd is not None:
692             os.chdir(cwd)
693     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
694                  xml_declaration=True, encoding="utf-8"))
695     title = document.book_info.title
696     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
697     for st in attributes:
698         meta = toc_file.makeelement(NCXNS('meta'))
699         meta.set('name', st)
700         meta.set('content', '0')
701         toc_file[0].append(meta)
702     toc_file[0][0].set('content', str(document.book_info.url))
703     toc_file[0][1].set('content', str(toc.depth()))
704     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
705
706     # write TOC
707     if html_toc:
708         toc.add(u"Spis treści", "toc.html", index=1)
709         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
710     toc.write_to_xml(nav_map)
711     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
712                  xml_declaration=True, encoding="utf-8"))
713     zip.close()
714
715     return OutputFile.from_filename(output_file.name)