1 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
4 from datetime import date
9 from ebooklib import epub
10 from lxml import etree
11 from librarian import functions, OutputFile, get_resource, XHTMLNS
12 from librarian.cover import make_cover
13 from librarian.embeds.mathml import MathML
14 from librarian.fonts import strip_font
19 self.element = etree.XML('''<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" href="style.css" type="text/css"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>WolneLektury.pl</title></head><body/></html>''')
23 return self.element.find('.//' + XHTMLNS('title'))
27 return self.element.find('.//' + XHTMLNS('body'))
33 def __init__(self, base_url=None, fundraising=None, cover=None):
34 self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
35 self.fundraising = fundraising
36 self.footnotes = etree.Element('div', id='footnotes')
37 self.make_cover = cover or make_cover
41 # 'header': self.header,
42 'footnotes': self.footnotes,
44 self.current_cursors = []
50 return self.current_cursors[-1]
52 def enter_fragment(self, fragment):
53 self.current_cursors.append(self.cursors[fragment])
55 def exit_fragment(self):
56 self.current_cursors.pop()
58 def create_fragment(self, name, element):
59 assert name not in self.cursors
60 self.cursors[name] = element
62 def forget_fragment(self, name):
63 del self.cursors[name]
67 if self._base_url is not None:
70 return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
73 # Base URL should be on Document level, not builder.
74 def build(self, document, **kwargs):
75 """Should return an OutputFile with the output."""
76 raise NotImplementedError()
79 class EpubBuilder(Builder):
80 build_method_fn = 'epub_build'
81 file_extension = 'epub'
82 isbn_field = 'isbn_epub'
85 def __init__(self, *args, debug=False, **kwargs):
90 super().__init__(*args, **kwargs)
92 def build(self, document, **kwargs):
93 # replace_characters -- nie, robimy to na poziomie elementów
95 # hyphenator (\00ad w odp. miejscach) -- jeśli już, to też powinno to się dziać na poziomie elementów
96 # spójniki (\u00a0 po)-- jeśli już, to na poziomie elementów
97 # trick na dywizy: ­⁠-
100 # początek z KAŻDEGO PLIKU xml
102 # zliczamy zbiór użytych znaków
105 # mieliśmy taką flagę less-advertising, używaną tylko dla Prestigio; już nie używamy.
107 # @editors = document.editors() (jako str)
108 # @funders = join(meta.funders)
109 # @thanks = meta.thanks
112 self.output = output = epub.EpubBook()
113 self.document = document
119 self.add_title_page()
128 'Początek utworu', # i18n
131 self.output.guide.append({
134 "href": "part1.xhtml"
138 self.build_document(self.document)
143 self.add_annotations()
144 self.add_support_page()
148 e = len(self.output.spine) - 3 - 3
149 nfunds = len(self.fundraising)
153 # COUNTING CHARACTERS?
154 for f in range(nfunds):
155 spine_index = int(4 + (f / nfunds * e) + f)
159 etree.XML('<div id="book-text"><div class="fundraising">' + self.fundraising[f % len(self.fundraising)] + '</div></div>')
161 self.add_html(h.element, file_name='fund%d.xhtml' % f, spine=spine_index)
165 output_file = tempfile.NamedTemporaryFile(
166 prefix='librarian', suffix='.epub',
169 epub.write_epub(output_file.name, output, {'epub3_landmark': False})
170 return OutputFile.from_filename(output_file.name)
172 def build_document(self, document):
173 self.toc_precedences = []
178 document.tree.getroot().epub_build(self)
179 if document.meta.parts:
182 self.start_element('div', {'class': 'title-page'})
183 self.start_element('h1', {'class': 'title'})
184 self.push_text(document.meta.title)
198 for child in document.children:
200 self.add_toc_entry(None, child.meta.title, 0)
201 self.build_document(child)
203 self.shift_toc_base()
206 def add_title_page(self):
208 html.title.text = "Strona tytułowa"
209 bt = etree.SubElement(html.body, 'div', **{'id': 'book-text'})
210 tp = etree.SubElement(bt, 'div', **{'class': 'title-page'})
212 # Tak jak jest teraz – czy może być jednocześnie
214 # i „dzieło nadrzędne”
215 # wcześniej mogło być dzieło nadrzędne,
217 e = self.document.tree.find('//autor_utworu')
219 etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text(self)
220 e = self.document.tree.find('//nazwa_utworu')
222 etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text(self)
225 for author in self.document.meta.authors:
226 etree.SubElement(tp, 'h2', **{'class': 'author'}).text = author.readable()
227 etree.SubElement(tp, 'h1', **{'class': 'title'}).text = self.document.meta.title
229 # <xsl:apply-templates select="//nazwa_utworu | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
231 # <xsl:apply-templates select="//dc:creator" mode="poczatek"/>
232 # <xsl:apply-templates select="//dc:title | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
234 etree.SubElement(tp, 'p', **{"class": "info"}).text = '\u00a0'
236 if self.document.meta.translators:
237 p = etree.SubElement(tp, 'p', **{'class': 'info'})
238 p.text = 'tłum. ' + ', '.join(t.readable() for t in self.document.meta.translators)
240 #<p class="info">[Kopia robocza]</p>
242 p = etree.XML("""<p class="info">
243 <a>Ta lektura</a>, podobnie jak tysiące innych, jest dostępna on-line na stronie
244 <a href="https://wolnelektury.pl/">wolnelektury.pl</a>.
246 p[0].attrib['href'] = str(self.document.meta.url)
249 if self.document.meta.thanks:
250 etree.SubElement(tp, 'p', **{'class': 'info'}).text = self.document.meta.thanks
252 tp.append(etree.XML("""
254 Utwór opracowany został w ramach projektu<a href="https://wolnelektury.pl/"> Wolne Lektury</a> przez<a href="https://fundacja.wolnelektury.pl/"> fundację Wolne Lektury</a>.
258 if getattr(self.document.meta, self.isbn_field):
259 etree.SubElement(tp, 'p', **{"class": "info"}).text = getattr(self.document.meta, self.isbn_field)
261 tp.append(etree.XML("""<p class="footer info">
262 <a href="https://wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
267 file_name='title.xhtml',
269 toc='Strona tytułowa' # TODO: i18n
273 get_resource('res/wl-logo-small.png'),
274 file_name='logo_wolnelektury.png',
275 media_type='image/png'
278 def set_metadata(self):
279 self.output.set_identifier(
280 str(self.document.meta.url))
281 self.output.set_language(
282 functions.lang_code_3to2(self.document.meta.language)
284 self.output.set_title(self.document.meta.title)
286 for i, author in enumerate(self.document.meta.authors):
287 self.output.add_author(
290 uid='creator{}'.format(i)
292 for translator in self.document.meta.translators:
293 self.output.add_author(
294 translator.readable(),
295 file_as=str(translator),
297 uid='translator{}'.format(i)
299 for publisher in self.document.meta.publisher:
300 self.output.add_metadata("DC", "publisher", publisher)
302 self.output.add_metadata("DC", "date", self.document.meta.created_at)
308 item = epub.EpubNav()
309 item.add_link(href='style.css', rel='stylesheet', type='text/css')
310 self.output.add_item(item)
311 self.output.spine.append(item)
312 self.output.add_item(epub.EpubNcx())
314 self.output.toc.append(
324 def add_support_page(self):
326 get_resource('res/epub/support.xhtml'),
328 toc='Wesprzyj Wolne Lektury'
332 get_resource('res/jedenprocent.png'),
333 media_type='image/png'
336 get_resource('res/epub/style.css'),
337 media_type='text/css'
341 def add_file(self, path=None, content=None,
342 media_type='application/xhtml+xml',
343 file_name=None, uid=None,
344 spine=False, toc=None):
347 # jakieś tam ścieśnianie białych znaków?
350 with open(path, 'rb') as f:
352 if file_name is None:
353 file_name = path.rsplit('/', 1)[-1]
356 uid = file_name.split('.', 1)[0]
358 item = epub.EpubItem(
361 media_type=media_type,
365 self.output.add_item(item)
368 self.output.spine.append(item)
370 self.output.spine.insert(spine, item)
373 self.output.toc.append(
381 def add_html(self, html_tree, **kwargs):
382 html = etree.tostring(
383 html_tree, pretty_print=True, xml_declaration=True,
385 doctype='<!DOCTYPE html>'
395 for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
396 'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
399 get_resource('fonts/' + fname),
403 media_type='font/ttf'
406 def start_chunk(self):
407 if getattr(self, 'current_chunk', None) is not None:
408 if not len(self.current_chunk):
411 self.current_chunk = etree.Element(
415 self.cursors[None] = self.current_chunk
416 self.current_cursors.append(self.current_chunk)
418 self.section_number = 0
421 def close_chunk(self):
422 assert self.cursor is self.current_chunk
423 ###### -- what if we're inside?
430 self.chunk_counter = chunk_no + 1
433 html.body.append(self.current_chunk)
436 ## html container from template.
439 file_name='part%d.xhtml' % chunk_no,
443 self.current_chunk = None
444 self.current_cursors.pop()
446 def start_element(self, tag, attr):
447 self.current_cursors.append(
448 etree.SubElement(self.cursor, tag, **attr)
451 def end_element(self):
452 self.current_cursors.pop()
454 def push_text(self, text):
455 self.chars.update(text)
457 self.cursor[-1].tail = (self.cursor[-1].tail or '') + text
459 self.cursor.text = (self.cursor.text or '') + text
462 def assign_image_number(self):
463 image_number = getattr(self, 'image_number', 0)
464 self.image_number = image_number + 1
467 def assign_footnote_number(self):
468 number = getattr(self, 'footnote_number', 1)
469 self.footnote_number = number + 1
472 def assign_section_number(self):
473 number = getattr(self, 'section_number', 1)
474 self.section_number = number + 1
477 def assign_mathml_number(self):
478 number = getattr(self, 'mathml_number', 0)
479 self.mathml_number = number + 1
483 def add_toc_entry(self, fragment, name, precedence):
485 while self.toc_precedences and self.toc_precedences[-1] >= precedence:
486 self.toc_precedences.pop()
488 self.toc_precedences = []
490 real_level = self.toc_base + len(self.toc_precedences)
492 self.toc_precedences.append(precedence)
496 part_number = getattr(
501 filename = 'part%d.xhtml' % part_number
502 uid = filename.split('.')[0]
504 filename += '#' + fragment
505 uid += '-' + fragment
507 toc = self.output.toc
508 for l in range(1, real_level):
509 if isinstance(toc[-1], epub.Link):
510 toc[-1] = [toc[-1], []]
521 def shift_toc_base(self):
525 def add_last_page(self):
527 m = self.document.meta
529 html.title.text = 'Strona redakcyjna'
530 d = etree.SubElement(html.body, 'div', id='book-text')
532 newp = lambda: etree.SubElement(d, 'p', {'class': 'info'})
536 "Wszystkie zasoby Wolnych Lektur możesz swobodnie wykorzystywać, "
537 "publikować i rozpowszechniać pod warunkiem zachowania warunków "
538 "licencji i zgodnie z "
540 a = etree.SubElement(p, "a", href="https://wolnelektury.pl/info/zasady-wykorzystania/")
541 a.text = "Zasadami wykorzystania Wolnych Lektur"
544 etree.SubElement(p, "br")
548 p[-1].tail = "Ten utwór jest udostępniony na licencji "
549 etree.SubElement(p, 'a', href=m.license).text = m.license_description
551 p[-1].tail = 'Ten utwór jest w domenie publicznej.'
553 etree.SubElement(p, "br")
556 "Wszystkie materiały dodatkowe (przypisy, motywy literackie) są "
559 etree.SubElement(p, 'a', href='https://artlibre.org/licence/lal/pl/').text = 'Licencji Wolnej Sztuki 1.3'
561 etree.SubElement(p, "br")
563 "Fundacja Wolne Lektury zastrzega sobie prawa do wydania "
564 "krytycznego zgodnie z art. Art.99(2) Ustawy o prawach autorskich "
565 "i prawach pokrewnych. Wykorzystując zasoby z Wolnych Lektur, "
566 "należy pamiętać o zapisach licencji oraz zasadach, które "
570 etree.SubElement(p, 'a', href='https://wolnelektury.pl/info/zasady-wykorzystania/').text = 'Zasadach wykorzystania Wolnych Lektur'
571 p[-1].tail = '. Zapoznaj się z nimi, zanim udostępnisz dalej nasze książki.'
574 p.text = 'E-book można pobrać ze strony: '
576 p, 'a', href=str(m.url),
578 ', '.join(p.readable() for p in m.authors),
584 newp().text = 'Tekst opracowany na podstawie: ' + m.source_name
588 """ + ", ".join(p for p in m.publisher)
591 newp().text = m.description
594 editors = self.document.editors()
596 newp().text = 'Opracowanie redakcyjne i przypisy: %s.' % (
597 ', '.join(e.readable() for e in sorted(editors))
601 etree.SubElement(d, 'p', {'class': 'minor-info'}).text = '''Publikację wsparli i wsparły:
602 %s.''' % (', '.join(m.funders))
606 p.text = 'Okładka na podstawie: '
616 if getattr(m, self.isbn_field):
617 newp().text = getattr(m, self.isbn_field)
619 newp().text = '\u00a0'
622 p.attrib['class'] = 'minor-info'
624 Plik wygenerowany dnia '''
625 span = etree.SubElement(p, 'span', id='file_date')
626 span.text = str(date.today())
632 file_name='last.xhtml',
633 toc='Strona redakcyjna',
638 def add_annotations(self):
639 if not len(self.footnotes):
643 html.title.text = 'Przypisy'
644 d = etree.SubElement(
659 d.extend(self.footnotes)
663 file_name='annotations.xhtml',
669 # TODO: allow other covers
671 cover_maker = self.make_cover
673 cover_file = io.BytesIO()
674 cover = cover_maker(self.document.meta, width=600)
675 cover.save(cover_file)
676 cover_name = 'cover.%s' % cover.ext()
678 self.output.set_cover(
679 file_name=cover_name,
680 content=cover_file.getvalue(),
683 ci = ('''<?xml version="1.0" encoding="UTF-8"?>
685 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en" xml:lang="en">
687 <title>Okładka</title>
689 body { margin: 0em; padding: 0em; }
690 img { width: 100%%; }
694 <img src="cover.%s" alt="Okładka" />
696 </html>''' % cover.ext()).encode('utf-8')
697 self.add_file(file_name='cover.xhtml', content=ci)
699 self.output.spine.append(('cover', 'no'))
700 self.output.guide.append({
702 'href': 'cover.xhtml',
706 def mathml(self, element):
707 name = "math%d.png" % self.assign_mathml_number()
709 content=MathML(element).to_latex().to_png().data,
710 media_type='image/png',
715 def process_comment(self, comment):
716 m = re.match(r'TRIM:(\d+)', comment.text)
718 self.splits.append(comment.sourceline - int(m.group(1)))