1 from datetime import date
4 from ebooklib import epub
7 from librarian import functions, OutputFile, get_resource, XHTMLNS
8 from librarian.cover import make_cover
9 from librarian.embeds.mathml import MathML
11 from librarian.fonts import strip_font
18 self.element = etree.XML('''<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" href="style.css" type="text/css"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>WolneLektury.pl</title></head><body/></html>''')
22 return self.element.find('.//' + XHTMLNS('title'))
26 return self.element.find('.//' + XHTMLNS('body'))
32 def __init__(self, base_url=None, fundraising=None):
33 self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
34 self.fundraising = fundraising
35 self.footnotes = etree.Element('div', id='footnotes')
39 # 'header': self.header,
40 'footnotes': self.footnotes,
42 self.current_cursors = []
48 return self.current_cursors[-1]
50 def enter_fragment(self, fragment):
51 self.current_cursors.append(self.cursors[fragment])
53 def exit_fragment(self):
54 self.current_cursors.pop()
56 def create_fragment(self, name, element):
57 assert name not in self.cursors
58 self.cursors[name] = element
60 def forget_fragment(self, name):
61 del self.cursors[name]
67 if self._base_url is not None:
70 return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
73 # Base URL should be on Document level, not builder.
74 def build(self, document, **kwargs):
75 """Should return an OutputFile with the output."""
76 raise NotImplementedError()
79 class EpubBuilder(Builder):
80 file_extension = 'epub'
82 def __init__(self, *args, **kwargs):
85 super().__init__(*args, **kwargs)
87 def build(self, document, **kwargs):
88 # replace_characters -- nie, robimy to na poziomie elementów
90 # hyphenator (\00ad w odp. miejscach) -- jeśli już, to też powinno to się dziać na poziomie elementów
91 # spójniki (\u00a0 po)-- jeśli już, to na poziomie elementów
92 # trick na dywizy: ­⁠-
95 # początek z KAŻDEGO PLIKU xml
97 # zliczamy zbiór użytych znaków
100 # mieliśmy taką flagę less-advertising, używaną tylko dla Prestigio; już nie używamy.
102 # @editors = document.editors() (jako str)
103 # @funders = join(meta.funders)
104 # @thanks = meta.thanks
107 self.output = output = epub.EpubBook()
108 self.document = document
115 self.add_title_page()
124 'Początek utworu', # i18n
127 self.output.guide.append({
130 "href": "part1.xhtml"
134 self.build_document(self.document)
139 self.add_annotations()
140 self.add_support_page()
144 e = len(self.output.spine) - 3 - 3
145 nfunds = len(self.fundraising)
149 # COUNTING CHARACTERS?
150 for f in range(nfunds):
151 spine_index = int(4 + (f / nfunds * e) + f)
155 etree.XML('<div id="book-text"><div class="fundraising">' + self.fundraising[f % len(self.fundraising)] + '</div></div>')
157 self.add_html(h.element, file_name='fund%d.xhtml' % f, spine=spine_index)
161 output_file = tempfile.NamedTemporaryFile(
162 prefix='librarian', suffix='.epub',
165 epub.write_epub(output_file.name, output, {'epub3_landmark': False})
166 return OutputFile.from_filename(output_file.name)
168 def build_document(self, document):
169 self.toc_precedences = []
174 document.tree.getroot().epub_build(self)
175 if document.meta.parts:
178 self.start_element('div', {'class': 'title-page'})
179 self.start_element('h1', {'class': 'title'})
180 self.push_text(document.meta.title)
194 for child in document.children:
196 self.add_toc_entry(None, child.meta.title, 0)
197 self.build_document(child)
199 self.shift_toc_base()
202 def add_title_page(self):
204 html.title.text = "Strona tytułowa"
205 bt = etree.SubElement(html.body, 'div', **{'id': 'book-text'})
206 tp = etree.SubElement(bt, 'div', **{'class': 'title-page'})
208 # Tak jak jest teraz – czy może być jednocześnie
210 # i „dzieło nadrzędne”
211 # wcześniej mogło być dzieło nadrzędne,
213 e = self.document.tree.find('//autor_utworu')
215 etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text()
216 e = self.document.tree.find('//nazwa_utworu')
218 etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text()
221 for author in self.document.meta.authors:
222 etree.SubElement(tp, 'h2', **{'class': 'author'}).text = author.readable()
223 etree.SubElement(tp, 'h1', **{'class': 'title'}).text = self.document.meta.title
225 # <xsl:apply-templates select="//nazwa_utworu | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
227 # <xsl:apply-templates select="//dc:creator" mode="poczatek"/>
228 # <xsl:apply-templates select="//dc:title | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
230 etree.SubElement(tp, 'p', **{"class": "info"}).text = '\u00a0'
232 if self.document.meta.translators:
233 p = etree.SubElement(tp, 'p', **{'class': 'info'})
234 p.text = 'tłum. ' + ', '.join(t.readable() for t in self.document.meta.translators)
236 #<p class="info">[Kopia robocza]</p>
238 p = etree.XML("""<p class="info">
239 <a>Ta lektura</a>, podobnie jak tysiące innych, jest dostępna on-line na stronie
240 <a href="http://www.wolnelektury.pl/">wolnelektury.pl</a>.
242 p[0].attrib['href'] = str(self.document.meta.url)
245 if self.document.meta.thanks:
246 etree.SubElement(tp, 'p', **{'class': 'info'}).text = self.document.meta.thanks
248 tp.append(etree.XML("""
250 Utwór opracowany został w ramach projektu<a href="http://www.wolnelektury.pl/"> Wolne Lektury</a> przez<a href="http://www.nowoczesnapolska.org.pl/"> fundację Nowoczesna Polska</a>.
254 if self.document.meta.isbn_epub:
255 etree.SubElement(tp, 'p', **{"class": "info"}).text = self.document.meta.isbn_epub
257 tp.append(etree.XML("""<p class="footer info">
258 <a href="http://www.wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
263 file_name='title.xhtml',
265 toc='Strona tytułowa' # TODO: i18n
269 get_resource('res/wl-logo-small.png'),
270 file_name='logo_wolnelektury.png',
271 media_type='image/png'
274 def set_metadata(self):
275 self.output.set_identifier(
276 str(self.document.meta.url))
277 self.output.set_language(
278 functions.lang_code_3to2(self.document.meta.language)
280 self.output.set_title(self.document.meta.title)
282 for i, author in enumerate(self.document.meta.authors):
283 self.output.add_author(
285 file_as=six.text_type(author),
286 uid='creator{}'.format(i)
288 for translator in self.document.meta.translators:
289 self.output.add_author(
290 translator.readable(),
291 file_as=six.text_type(translator),
293 uid='translator{}'.format(i)
295 for publisher in self.document.meta.publisher:
296 self.output.add_metadata("DC", "publisher", publisher)
298 self.output.add_metadata("DC", "date", self.document.meta.created_at)
304 item = epub.EpubNav()
305 self.output.add_item(item)
306 self.output.spine.append(item)
307 self.output.add_item(epub.EpubNcx())
309 self.output.toc.append(
319 def add_support_page(self):
321 get_resource('epub/support.xhtml'),
323 toc='Wesprzyj Wolne Lektury'
327 get_resource('res/jedenprocent.png'),
328 media_type='image/png'
331 get_resource('epub/style.css'),
332 media_type='text/css'
336 def add_file(self, path=None, content=None,
337 media_type='application/xhtml+xml',
338 file_name=None, uid=None,
339 spine=False, toc=None):
342 # jakieś tam ścieśnianie białych znaków?
345 with open(path, 'rb') as f:
347 if file_name is None:
348 file_name = path.rsplit('/', 1)[-1]
351 uid = file_name.split('.', 1)[0]
353 item = epub.EpubItem(
356 media_type=media_type,
360 self.output.add_item(item)
363 self.output.spine.append(item)
365 self.output.spine.insert(spine, item)
368 self.output.toc.append(
376 def add_html(self, html_tree, **kwargs):
377 html = etree.tostring(
378 html_tree, pretty_print=True, xml_declaration=True,
380 doctype='<!DOCTYPE html>'
383 html = librarian.epub.squeeze_whitespace(html)
392 for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
393 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
396 get_resource('fonts/' + fname),
400 media_type='font/ttf'
403 def start_chunk(self):
404 if getattr(self, 'current_chunk', None) is not None:
405 if not len(self.current_chunk):
408 self.current_chunk = etree.Element(
412 self.cursors[None] = self.current_chunk
413 self.current_cursors.append(self.current_chunk)
415 self.section_number = 0
418 def close_chunk(self):
419 assert self.cursor is self.current_chunk
420 ###### -- what if we're inside?
427 self.chunk_counter = chunk_no + 1
430 html.body.append(self.current_chunk)
433 ## html container from template.
436 file_name='part%d.xhtml' % chunk_no,
440 self.current_chunk = None
441 self.current_cursors.pop()
443 def start_element(self, tag, attr):
444 self.current_cursors.append(
445 etree.SubElement(self.cursor, tag, **attr)
448 def end_element(self):
449 self.current_cursors.pop()
451 def push_text(self, text):
452 self.chars.update(text)
454 self.cursor[-1].tail = (self.cursor[-1].tail or '') + text
456 self.cursor.text = (self.cursor.text or '') + text
459 def assign_image_number(self):
460 image_number = getattr(self, 'image_number', 0)
461 self.image_number = image_number + 1
464 def assign_footnote_number(self):
465 number = getattr(self, 'footnote_number', 1)
466 self.footnote_number = number + 1
469 def assign_section_number(self):
470 number = getattr(self, 'section_number', 1)
471 self.section_number = number + 1
474 def assign_mathml_number(self):
475 number = getattr(self, 'mathml_number', 0)
476 self.mathml_number = number + 1
480 def add_toc_entry(self, fragment, name, precedence):
482 while self.toc_precedences and self.toc_precedences[-1] >= precedence:
483 self.toc_precedences.pop()
485 self.toc_precedences = []
487 real_level = self.toc_base + len(self.toc_precedences)
489 self.toc_precedences.append(precedence)
493 part_number = getattr(
498 filename = 'part%d.xhtml' % part_number
499 uid = filename.split('.')[0]
501 filename += '#' + fragment
502 uid += '-' + fragment
504 toc = self.output.toc
505 for l in range(1, real_level):
506 if isinstance(toc[-1], epub.Link):
507 toc[-1] = [toc[-1], []]
518 def shift_toc_base(self):
522 def add_last_page(self):
524 m = self.document.meta
526 html.title.text = 'Strona redakcyjna'
527 d = etree.SubElement(html.body, 'div', id='book-text')
529 newp = lambda: etree.SubElement(d, 'p', {'class': 'info'})
534 Ten utwór jest udostępniony na licencji
536 etree.SubElement(p, 'a', href=m.license).text = m.license_description
539 Ten utwór nie jest objęty majątkowym prawem autorskim i znajduje się w domenie
540 publicznej, co oznacza że możesz go swobodnie wykorzystywać, publikować
541 i rozpowszechniać. Jeśli utwór opatrzony jest dodatkowymi materiałami
542 (przypisy, motywy literackie etc.), które podlegają prawu autorskiemu, to
543 te dodatkowe materiały udostępnione są na licencji
545 a = etree.SubElement(p, "a", href="http://creativecommons.org/licenses/by-sa/3.0/")
546 a.text = """Creative Commons
547 Uznanie Autorstwa – Na Tych Samych Warunkach 3.0 PL"""
554 p, 'a', href=str(m.url),
556 ', '.join(p.readable() for p in m.authors),
562 newp().text = 'Tekst opracowany na podstawie: ' + m.source_name
566 """ + ", ".join(p for p in m.publisher)
569 newp().text = m.description
573 newp().text = 'Opracowanie redakcyjne i przypisy: %s.' % (
574 ', '.join(e.readable() for e in sorted(self.document.editors())))
577 etree.SubElement(d, 'p', {'class': 'minor-info'}).text = '''Publikację wsparli i wsparły:
578 %s.''' % (', '.join(m.funders))
582 p.text = 'Okładka na podstawie: '
593 newp().text = m.isbn_epub
595 newp().text = '\u00a0'
598 p.attrib['class'] = 'minor-info'
600 Plik wygenerowany dnia '''
601 span = etree.SubElement(p, 'span', id='file_date')
602 span.text = str(date.today())
608 file_name='last.xhtml',
609 toc='Strona redakcyjna',
614 def add_annotations(self):
615 if not len(self.footnotes):
619 html.title.text = 'Przypisy'
620 d = etree.SubElement(
635 d.extend(self.footnotes)
639 file_name='annotations.xhtml',
645 # TODO: allow other covers
647 cover_maker = make_cover
649 cover_file = six.BytesIO()
650 cover = cover_maker(self.document.meta)
651 cover.save(cover_file)
652 cover_name = 'cover.%s' % cover.ext()
654 self.output.set_cover(
655 file_name=cover_name,
656 content=cover_file.getvalue(),
659 ci = ('''<?xml version="1.0" encoding="UTF-8"?>
661 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en" xml:lang="en">
663 <title>Okładka</title>
665 body { margin: 0em; padding: 0em; }
666 img { width: 100%%; }
670 <img src="cover.%s" alt="Okładka" />
672 </html>''' % cover.ext()).encode('utf-8')
673 self.add_file(file_name='cover.xhtml', content=ci)
675 self.output.spine.append(('cover', 'no'))
676 self.output.guide.append({
678 'href': 'cover.xhtml',
682 def mathml(self, element):
683 name = "math%d.png" % self.assign_mathml_number()
685 content=MathML(element).to_latex().to_png().data,
686 media_type='image/png',