7fd6dafdbd4dd6646d4901b767164b1633dc9fc9
[librarian.git] / librarian / epub.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import with_statement
7
8 import os
9 import os.path
10 import re
11 import subprocess
12 from StringIO import StringIO
13 from copy import deepcopy
14 from mimetypes import guess_type
15
16 from lxml import etree
17 import zipfile
18 from tempfile import mkdtemp, NamedTemporaryFile
19 from shutil import rmtree
20
21 from librarian import RDFNS, WLNS, NCXNS, OPFNS, XHTMLNS, DCNS, OutputFile
22 from librarian.cover import make_cover
23
24 from librarian import functions, get_resource
25
26 from librarian.hyphenator import Hyphenator
27
28 functions.reg_person_name()
29 functions.reg_lang_code_3to2()
30
31
32 def set_hyph_language(source_tree):
33     def get_short_lng_code(text):
34         result = ''
35         text = ''.join(text)
36         with open(get_resource('res/ISO-639-2_8859-1.txt'), 'rb') as f:
37             for line in f:
38                 list = line.strip().split('|')
39                 if list[0] == text:
40                     result = list[2]
41         if result == '':
42             return text
43         else:
44             return result
45     bibl_lng = etree.XPath('//dc:language//text()',
46                            namespaces={'dc': str(DCNS)})(source_tree)
47     short_lng = get_short_lng_code(bibl_lng[0])
48     try:
49         return Hyphenator(get_resource('res/hyph-dictionaries/hyph_' +
50                                        short_lng + '.dic'))
51     except:
52         pass
53
54
55 def hyphenate_and_fix_conjunctions(source_tree, hyph):
56     texts = etree.XPath('/utwor/*[2]//text()')(source_tree)
57     for t in texts:
58         parent = t.getparent()
59         if hyph is not None:
60             newt = ''
61             wlist = re.compile(r'\w+|[^\w]', re.UNICODE).findall(t)
62             for w in wlist:
63                 newt += hyph.inserted(w, u'\u00AD')
64         else:
65             newt = t
66         newt = re.sub(r'(?<=\s\w)\s+', u'\u00A0', newt)
67         if t.is_text:
68             parent.text = newt
69         elif t.is_tail:
70             parent.tail = newt
71
72
73 def inner_xml(node):
74     """ returns node's text and children as a string
75
76     >>> print inner_xml(etree.fromstring('<a>x<b>y</b>z</a>'))
77     x<b>y</b>z
78     """
79
80     nt = node.text if node.text is not None else ''
81     return ''.join([nt] + [etree.tostring(child) for child in node])
82
83
84 def set_inner_xml(node, text):
85     """ sets node's text and children from a string
86
87     >>> e = etree.fromstring('<a>b<b>x</b>x</a>')
88     >>> set_inner_xml(e, 'x<b>y</b>z')
89     >>> print etree.tostring(e)
90     <a>x<b>y</b>z</a>
91     """
92
93     p = etree.fromstring('<x>%s</x>' % text)
94     node.text = p.text
95     node[:] = p[:]
96
97
98 def node_name(node):
99     """ Find out a node's name
100
101     >>> print node_name(etree.fromstring('<a>X<b>Y</b>Z</a>'))
102     XYZ
103     """
104
105     tempnode = deepcopy(node)
106
107     for p in ('pe', 'pa', 'pt', 'pr', 'motyw'):
108         for e in tempnode.findall('.//%s' % p):
109             t = e.tail
110             e.clear()
111             e.tail = t
112     etree.strip_tags(tempnode, '*')
113     return tempnode.text
114
115
116 def xslt(xml, sheet, **kwargs):
117     if isinstance(xml, etree._Element):
118         xml = etree.ElementTree(xml)
119     with open(sheet) as xsltf:
120         transform = etree.XSLT(etree.parse(xsltf))
121         params = dict((key, transform.strparam(value)) for key, value in kwargs.iteritems())
122         return transform(xml, **params)
123
124
125 def replace_characters(node):
126     def replace_chars(text):
127         if text is None:
128             return None
129         return text.replace(u"\ufeff", u"")\
130                    .replace("---", u"\u2014")\
131                    .replace("--", u"\u2013")\
132                    .replace(",,", u"\u201E")\
133                    .replace('"', u"\u201D")\
134                    .replace("'", u"\u2019")
135     if node.tag in ('uwaga', 'extra'):
136         t = node.tail
137         node.clear()
138         node.tail = t
139     node.text = replace_chars(node.text)
140     node.tail = replace_chars(node.tail)
141     for child in node:
142         replace_characters(child)
143
144
145 def find_annotations(annotations, source, part_no):
146     for child in source:
147         if child.tag in ('pe', 'pa', 'pt', 'pr'):
148             annotation = deepcopy(child)
149             number = str(len(annotations) + 1)
150             annotation.set('number', number)
151             annotation.set('part', str(part_no))
152             annotation.tail = ''
153             annotations.append(annotation)
154             tail = child.tail
155             child.clear()
156             child.tail = tail
157             child.text = number
158         if child.tag not in ('extra', 'uwaga'):
159             find_annotations(annotations, child, part_no)
160
161
162 class Stanza(object):
163     """
164     Converts / verse endings into verse elements in a stanza.
165
166     Slashes may only occur directly in the stanza. Any slashes in subelements
167     will be ignored, and the subelements will be put inside verse elements.
168
169     >>> s = etree.fromstring("<strofa>a <b>c</b> <b>c</b>/\\nb<x>x/\\ny</x>c/ \\nd</strofa>")
170     >>> Stanza(s).versify()
171     >>> print etree.tostring(s)
172     <strofa><wers_normalny>a <b>c</b> <b>c</b></wers_normalny><wers_normalny>b<x>x/
173     y</x>c</wers_normalny><wers_normalny>d</wers_normalny></strofa>
174
175     """
176     def __init__(self, stanza_elem):
177         self.stanza = stanza_elem
178         self.verses = []
179         self.open_verse = None
180
181     def versify(self):
182         self.push_text(self.stanza.text)
183         for elem in self.stanza:
184             self.push_elem(elem)
185             self.push_text(elem.tail)
186         tail = self.stanza.tail
187         self.stanza.clear()
188         self.stanza.tail = tail
189         self.stanza.extend(self.verses)
190
191     def open_normal_verse(self):
192         self.open_verse = self.stanza.makeelement("wers_normalny")
193         self.verses.append(self.open_verse)
194
195     def get_open_verse(self):
196         if self.open_verse is None:
197             self.open_normal_verse()
198         return self.open_verse
199
200     def push_text(self, text):
201         if not text:
202             return
203         for i, verse_text in enumerate(re.split(r"/\s*\n", text)):
204             if i:
205                 self.open_normal_verse()
206             if not verse_text.strip():
207                 continue
208             verse = self.get_open_verse()
209             if len(verse):
210                 verse[-1].tail = (verse[-1].tail or "") + verse_text
211             else:
212                 verse.text = (verse.text or "") + verse_text
213
214     def push_elem(self, elem):
215         if elem.tag.startswith("wers"):
216             verse = deepcopy(elem)
217             verse.tail = None
218             self.verses.append(verse)
219             self.open_verse = verse
220         else:
221             appended = deepcopy(elem)
222             appended.tail = None
223             self.get_open_verse().append(appended)
224
225
226 def replace_by_verse(tree):
227     """ Find stanzas and create new verses in place of a '/' character """
228
229     stanzas = tree.findall('.//' + WLNS('strofa'))
230     for stanza in stanzas:
231         Stanza(stanza).versify()
232
233
234 def add_to_manifest(manifest, partno):
235     """ Adds a node to the manifest section in content.opf file """
236
237     partstr = 'part%d' % partno
238     e = manifest.makeelement(
239         OPFNS('item'), attrib={'id': partstr, 'href': partstr + '.html',
240                                'media-type': 'application/xhtml+xml'}
241     )
242     manifest.append(e)
243
244
245 def add_to_spine(spine, partno):
246     """ Adds a node to the spine section in content.opf file """
247
248     e = spine.makeelement(OPFNS('itemref'), attrib={'idref': 'part%d' % partno})
249     spine.append(e)
250
251
252 class TOC(object):
253     def __init__(self, name=None, part_href=None):
254         self.children = []
255         self.name = name
256         self.part_href = part_href
257         self.sub_number = None
258
259     def add(self, name, part_href, level=0, is_part=True, index=None):
260         assert level == 0 or index is None
261         if level > 0 and self.children:
262             return self.children[-1].add(name, part_href, level - 1, is_part)
263         else:
264             t = TOC(name)
265             t.part_href = part_href
266             if index is not None:
267                 self.children.insert(index, t)
268             else:
269                 self.children.append(t)
270             if not is_part:
271                 t.sub_number = len(self.children) + 1
272                 return t.sub_number
273
274     def append(self, toc):
275         self.children.append(toc)
276
277     def extend(self, toc):
278         self.children.extend(toc.children)
279
280     def depth(self):
281         if self.children:
282             return max((c.depth() for c in self.children)) + 1
283         else:
284             return 0
285
286     def href(self):
287         src = self.part_href
288         if self.sub_number is not None:
289             src += '#sub%d' % self.sub_number
290         return src
291
292     def write_to_xml(self, nav_map, counter=1):
293         for child in self.children:
294             nav_point = nav_map.makeelement(NCXNS('navPoint'))
295             nav_point.set('id', 'NavPoint-%d' % counter)
296             nav_point.set('playOrder', str(counter))
297
298             nav_label = nav_map.makeelement(NCXNS('navLabel'))
299             text = nav_map.makeelement(NCXNS('text'))
300             if child.name is not None:
301                 text.text = re.sub(r'\n', ' ', child.name)
302             else:
303                 text.text = child.name
304             nav_label.append(text)
305             nav_point.append(nav_label)
306
307             content = nav_map.makeelement(NCXNS('content'))
308             content.set('src', child.href())
309             nav_point.append(content)
310             nav_map.append(nav_point)
311             counter = child.write_to_xml(nav_point, counter + 1)
312         return counter
313
314     def html_part(self, depth=0):
315         texts = []
316         for child in self.children:
317             texts.append(
318                 "<div style='margin-left:%dem;'><a href='%s'>%s</a></div>" %
319                 (depth, child.href(), child.name))
320             texts.append(child.html_part(depth + 1))
321         return "\n".join(texts)
322
323     def html(self):
324         with open(get_resource('epub/toc.html')) as f:
325             t = unicode(f.read(), 'utf-8')
326         return t % self.html_part()
327
328
329 def used_chars(element):
330     """ Lists characters used in an ETree Element """
331     chars = set((element.text or '') + (element.tail or ''))
332     for child in element:
333         chars = chars.union(used_chars(child))
334     return chars
335
336
337 def chop(main_text):
338     """ divide main content of the XML file into chunks """
339
340     # prepare a container for each chunk
341     part_xml = etree.Element('utwor')
342     etree.SubElement(part_xml, 'master')
343     main_xml_part = part_xml[0]  # master
344
345     last_node_part = False
346
347     # the below loop are workaround for a problem with epubs in drama ebooks without acts
348     is_scene = False
349     is_act = False
350     for one_part in main_text:
351         name = one_part.tag
352         if name == 'naglowek_scena':
353             is_scene = True
354         elif name == 'naglowek_akt':
355             is_act = True
356
357     for one_part in main_text:
358         name = one_part.tag
359         if is_act is False and is_scene is True:
360             if name == 'naglowek_czesc':
361                 yield part_xml
362                 last_node_part = True
363                 main_xml_part[:] = [deepcopy(one_part)]
364             elif not last_node_part and name == "naglowek_scena":
365                 yield part_xml
366                 main_xml_part[:] = [deepcopy(one_part)]
367             else:
368                 main_xml_part.append(deepcopy(one_part))
369                 last_node_part = False
370         else:
371             if name == 'naglowek_czesc':
372                 yield part_xml
373                 last_node_part = True
374                 main_xml_part[:] = [deepcopy(one_part)]
375             elif not last_node_part and name in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
376                 yield part_xml
377                 main_xml_part[:] = [deepcopy(one_part)]
378             else:
379                 main_xml_part.append(deepcopy(one_part))
380                 last_node_part = False
381     yield part_xml
382
383
384 def transform_chunk(chunk_xml, chunk_no, annotations, empty=False, _empty_html_static=[]):
385     """ transforms one chunk, returns a HTML string, a TOC object and a set of used characters """
386
387     toc = TOC()
388     for element in chunk_xml[0]:
389         if element.tag == "naglowek_czesc":
390             toc.add(node_name(element), "part%d.html#book-text" % chunk_no)
391         elif element.tag in ("naglowek_rozdzial", "naglowek_akt", "srodtytul"):
392             toc.add(node_name(element), "part%d.html" % chunk_no)
393         elif element.tag in ('naglowek_podrozdzial', 'naglowek_scena'):
394             subnumber = toc.add(node_name(element), "part%d.html" % chunk_no, level=1, is_part=False)
395             element.set('sub', str(subnumber))
396     if empty:
397         if not _empty_html_static:
398             _empty_html_static.append(open(get_resource('epub/emptyChunk.html')).read())
399         chars = set()
400         output_html = _empty_html_static[0]
401     else:
402         find_annotations(annotations, chunk_xml, chunk_no)
403         replace_by_verse(chunk_xml)
404         html_tree = xslt(chunk_xml, get_resource('epub/xsltScheme.xsl'))
405         chars = used_chars(html_tree.getroot())
406         output_html = etree.tostring(
407             html_tree, pretty_print=True, xml_declaration=True,
408             encoding="utf-8",
409             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
410                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
411         )
412     return output_html, toc, chars
413
414
415 def transform(wldoc, verbose=False, style=None, html_toc=False,
416               sample=None, cover=None, flags=None, hyphenate=False, ilustr_path='', output_type='epub'):
417     """ produces a EPUB file
418
419     sample=n: generate sample e-book (with at least n paragraphs)
420     cover: a cover.Cover factory or True for default
421     flags: less-advertising, without-fonts, working-copy
422     """
423
424     def transform_file(wldoc, chunk_counter=1, first=True, sample=None):
425         """ processes one input file and proceeds to its children """
426
427         replace_characters(wldoc.edoc.getroot())
428
429         hyphenator = set_hyph_language(wldoc.edoc.getroot()) if hyphenate else None
430         hyphenate_and_fix_conjunctions(wldoc.edoc.getroot(), hyphenator)
431
432         # every input file will have a TOC entry,
433         # pointing to starting chunk
434         toc = TOC(wldoc.book_info.title, "part%d.html" % chunk_counter)
435         chars = set()
436         if first:
437             # write book title page
438             html_tree = xslt(wldoc.edoc, get_resource('epub/xsltTitle.xsl'), outputtype=output_type)
439             chars = used_chars(html_tree.getroot())
440             zip.writestr(
441                 'OPS/title.html',
442                 etree.tostring(
443                     html_tree, pretty_print=True, xml_declaration=True,
444                     encoding="utf-8",
445                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
446                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
447                 )
448             )
449             # add a title page TOC entry
450             toc.add(u"Strona tytułowa", "title.html")
451         elif wldoc.book_info.parts:
452             # write title page for every parent
453             if sample is not None and sample <= 0:
454                 chars = set()
455                 html_string = open(get_resource('epub/emptyChunk.html')).read()
456             else:
457                 html_tree = xslt(wldoc.edoc, get_resource('epub/xsltChunkTitle.xsl'))
458                 chars = used_chars(html_tree.getroot())
459                 html_string = etree.tostring(
460                     html_tree, pretty_print=True, xml_declaration=True,
461                     encoding="utf-8",
462                     doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"' +
463                             ' "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
464                 )
465                 html_string = re.sub(ur'([^\r])\n', ur'\1\r\n', html_string)
466             zip.writestr('OPS/part%d.html' % chunk_counter, html_string)
467             add_to_manifest(manifest, chunk_counter)
468             add_to_spine(spine, chunk_counter)
469             chunk_counter += 1
470
471         if len(wldoc.edoc.getroot()) > 1:
472             # rdf before style master
473             main_text = wldoc.edoc.getroot()[1]
474         else:
475             # rdf in style master
476             main_text = wldoc.edoc.getroot()[0]
477             if main_text.tag == RDFNS('RDF'):
478                 main_text = None
479
480         if main_text is not None:
481             for chunk_xml in chop(main_text):
482                 empty = False
483                 if sample is not None:
484                     if sample <= 0:
485                         empty = True
486                     else:
487                         sample -= len(chunk_xml.xpath('//strofa|//akap|//akap_cd|//akap_dialog'))
488                 chunk_html, chunk_toc, chunk_chars = transform_chunk(chunk_xml, chunk_counter, annotations, empty)
489
490                 toc.extend(chunk_toc)
491                 chars = chars.union(chunk_chars)
492                 zip.writestr('OPS/part%d.html' % chunk_counter, chunk_html)
493                 add_to_manifest(manifest, chunk_counter)
494                 add_to_spine(spine, chunk_counter)
495                 chunk_counter += 1
496
497         for child in wldoc.parts():
498             child_toc, chunk_counter, chunk_chars, sample = transform_file(
499                 child, chunk_counter, first=False, sample=sample)
500             toc.append(child_toc)
501             chars = chars.union(chunk_chars)
502
503         return toc, chunk_counter, chars, sample
504
505     document = deepcopy(wldoc)
506     del wldoc
507
508     if flags:
509         for flag in flags:
510             document.edoc.getroot().set(flag, 'yes')
511
512     document.clean_ed_note()
513     document.clean_ed_note('abstrakt')
514
515     # add editors info
516     editors = document.editors()
517     if editors:
518         document.edoc.getroot().set('editors', u', '.join(sorted(
519             editor.readable() for editor in editors)))
520     if document.book_info.funders:
521         document.edoc.getroot().set('funders', u', '.join(
522             document.book_info.funders))
523     if document.book_info.thanks:
524         document.edoc.getroot().set('thanks', document.book_info.thanks)
525
526     opf = xslt(document.book_info.to_etree(), get_resource('epub/xsltContent.xsl'))
527     manifest = opf.find('.//' + OPFNS('manifest'))
528     guide = opf.find('.//' + OPFNS('guide'))
529     spine = opf.find('.//' + OPFNS('spine'))
530
531     output_file = NamedTemporaryFile(prefix='librarian', suffix='.epub', delete=False)
532     zip = zipfile.ZipFile(output_file, 'w', zipfile.ZIP_DEFLATED)
533
534     functions.reg_mathml_epub(zip)
535
536     if os.path.isdir(ilustr_path):
537         for i, filename in enumerate(os.listdir(ilustr_path)):
538             file_path = os.path.join(ilustr_path, filename)
539             zip.write(file_path, os.path.join('OPS', filename))
540             image_id = 'image%s' % i
541             manifest.append(etree.fromstring(
542                 '<item id="%s" href="%s" media-type="%s" />' % (image_id, filename, guess_type(file_path)[0])))
543
544     # write static elements
545     mime = zipfile.ZipInfo()
546     mime.filename = 'mimetype'
547     mime.compress_type = zipfile.ZIP_STORED
548     mime.extra = ''
549     zip.writestr(mime, 'application/epub+zip')
550     zip.writestr(
551         'META-INF/container.xml',
552         '<?xml version="1.0" ?>'
553         '<container version="1.0" '
554         'xmlns="urn:oasis:names:tc:opendocument:xmlns:container">'
555         '<rootfiles><rootfile full-path="OPS/content.opf" '
556         'media-type="application/oebps-package+xml" />'
557         '</rootfiles></container>'
558     )
559     zip.write(get_resource('res/wl-logo-small.png'),
560               os.path.join('OPS', 'logo_wolnelektury.png'))
561     zip.write(get_resource('res/jedenprocent.png'),
562               os.path.join('OPS', 'jedenprocent.png'))
563     if not style:
564         style = get_resource('epub/style.css')
565     zip.write(style, os.path.join('OPS', 'style.css'))
566
567     if cover:
568         if cover is True:
569             cover = make_cover
570
571         cover_file = StringIO()
572         bound_cover = cover(document.book_info)
573         bound_cover.save(cover_file)
574         cover_name = 'cover.%s' % bound_cover.ext()
575         zip.writestr(os.path.join('OPS', cover_name), cover_file.getvalue())
576         del cover_file
577
578         cover_tree = etree.parse(get_resource('epub/cover.html'))
579         cover_tree.find('//' + XHTMLNS('img')).set('src', cover_name)
580         zip.writestr('OPS/cover.html', etree.tostring(
581             cover_tree, pretty_print=True, xml_declaration=True,
582             encoding="utf-8",
583             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
584                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
585         ))
586
587         if bound_cover.uses_dc_cover:
588             if document.book_info.cover_by:
589                 document.edoc.getroot().set('data-cover-by', document.book_info.cover_by)
590             if document.book_info.cover_source:
591                 document.edoc.getroot().set('data-cover-source', document.book_info.cover_source)
592
593         manifest.append(etree.fromstring(
594             '<item id="cover" href="cover.html" media-type="application/xhtml+xml" />'))
595         manifest.append(etree.fromstring(
596             '<item id="cover-image" href="%s" media-type="%s" />' % (cover_name, bound_cover.mime_type())))
597         spine.insert(0, etree.fromstring('<itemref idref="cover"/>'))
598         opf.getroot()[0].append(etree.fromstring('<meta name="cover" content="cover-image"/>'))
599         guide.append(etree.fromstring('<reference href="cover.html" type="cover" title="Okładka"/>'))
600
601     annotations = etree.Element('annotations')
602
603     toc_file = etree.fromstring(
604         '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE ncx PUBLIC '
605         '"-//NISO//DTD ncx 2005-1//EN" '
606         '"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">'
607         '<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="pl" '
608         'version="2005-1"><head></head><docTitle></docTitle><navMap>'
609         '</navMap></ncx>'
610     )
611     nav_map = toc_file[-1]
612
613     if html_toc:
614         manifest.append(etree.fromstring(
615             '<item id="html_toc" href="toc.html" media-type="application/xhtml+xml" />'))
616         spine.append(etree.fromstring(
617             '<itemref idref="html_toc" />'))
618         guide.append(etree.fromstring('<reference href="toc.html" type="toc" title="Spis treści"/>'))
619
620     toc, chunk_counter, chars, sample = transform_file(document, sample=sample)
621
622     if len(toc.children) < 2:
623         toc.add(u"Początek utworu", "part1.html")
624
625     # Last modifications in container files and EPUB creation
626     if len(annotations) > 0:
627         toc.add("Przypisy", "annotations.html")
628         manifest.append(etree.fromstring(
629             '<item id="annotations" href="annotations.html" media-type="application/xhtml+xml" />'))
630         spine.append(etree.fromstring(
631             '<itemref idref="annotations" />'))
632         replace_by_verse(annotations)
633         html_tree = xslt(annotations, get_resource('epub/xsltAnnotations.xsl'))
634         chars = chars.union(used_chars(html_tree.getroot()))
635         zip.writestr('OPS/annotations.html', etree.tostring(
636             html_tree, pretty_print=True, xml_declaration=True,
637             encoding="utf-8",
638             doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
639                     '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
640         ))
641
642     toc.add("Wesprzyj Wolne Lektury", "support.html")
643     manifest.append(etree.fromstring(
644         '<item id="support" href="support.html" media-type="application/xhtml+xml" />'))
645     spine.append(etree.fromstring(
646         '<itemref idref="support" />'))
647     html_string = open(get_resource('epub/support.html')).read()
648     chars.update(used_chars(etree.fromstring(html_string)))
649     zip.writestr('OPS/support.html', html_string)
650
651     toc.add("Strona redakcyjna", "last.html")
652     manifest.append(etree.fromstring(
653         '<item id="last" href="last.html" media-type="application/xhtml+xml" />'))
654     spine.append(etree.fromstring(
655         '<itemref idref="last" />'))
656     html_tree = xslt(document.edoc, get_resource('epub/xsltLast.xsl'), outputtype=output_type)
657     chars.update(used_chars(html_tree.getroot()))
658     zip.writestr('OPS/last.html', etree.tostring(
659         html_tree, pretty_print=True, xml_declaration=True,
660         encoding="utf-8",
661         doctype='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ' +
662                 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
663     ))
664
665     if not flags or 'without-fonts' not in flags:
666         # strip fonts
667         tmpdir = mkdtemp('-librarian-epub')
668         try:
669             cwd = os.getcwd()
670         except OSError:
671             cwd = None
672
673         os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'font-optimizer'))
674         for fname in 'DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf', 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf':
675             optimizer_call = ['perl', 'subset.pl', '--chars',
676                               ''.join(chars).encode('utf-8'),
677                               get_resource('fonts/' + fname),
678                               os.path.join(tmpdir, fname)]
679             env = {"PERL_USE_UNSAFE_INC": "1"}
680             if verbose:
681                 print "Running font-optimizer"
682                 subprocess.check_call(optimizer_call, env=env)
683             else:
684                 dev_null = open(os.devnull, 'w')
685                 subprocess.check_call(optimizer_call, stdout=dev_null, stderr=dev_null, env=env)
686             zip.write(os.path.join(tmpdir, fname), os.path.join('OPS', fname))
687             manifest.append(etree.fromstring(
688                 '<item id="%s" href="%s" media-type="application/x-font-truetype" />' % (fname, fname)))
689         rmtree(tmpdir)
690         if cwd is not None:
691             os.chdir(cwd)
692     zip.writestr('OPS/content.opf', etree.tostring(opf, pretty_print=True,
693                  xml_declaration=True, encoding="utf-8"))
694     title = document.book_info.title
695     attributes = "dtb:uid", "dtb:depth", "dtb:totalPageCount", "dtb:maxPageNumber"
696     for st in attributes:
697         meta = toc_file.makeelement(NCXNS('meta'))
698         meta.set('name', st)
699         meta.set('content', '0')
700         toc_file[0].append(meta)
701     toc_file[0][0].set('content', str(document.book_info.url))
702     toc_file[0][1].set('content', str(toc.depth()))
703     set_inner_xml(toc_file[1], ''.join(('<text>', title, '</text>')))
704
705     # write TOC
706     if html_toc:
707         toc.add(u"Spis treści", "toc.html", index=1)
708         zip.writestr('OPS/toc.html', toc.html().encode('utf-8'))
709     toc.write_to_xml(nav_map)
710     zip.writestr('OPS/toc.ncx', etree.tostring(toc_file, pretty_print=True,
711                  xml_declaration=True, encoding="utf-8"))
712     zip.close()
713
714     return OutputFile.from_filename(output_file.name)