src/librarian/builders/epub.py

   1 from datetime import date
   2 import os
   3 import tempfile
   4 from ebooklib import epub
   5 from lxml import etree
   6 import six
   7 from librarian import functions, OutputFile, get_resource, XHTMLNS
   8 from librarian.cover import make_cover
   9 from librarian.embeds.mathml import MathML
  10 import librarian.epub
  11 from librarian.fonts import strip_font
  12
  13
  14
  15
  16 class Xhtml:
  17     def __init__(self):
  18         self.element = etree.XML('''<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" href="style.css" type="text/css"/><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>WolneLektury.pl</title></head><body/></html>''')
  19
  20     @property
  21     def title(self):
  22         return self.element.find('.//' + XHTMLNS('title'))
  23
  24     @property
  25     def body(self):
  26         return self.element.find('.//' + XHTMLNS('body'))
  27
  28
  29 class Builder:
  30     file_extension = None
  31
  32     def __init__(self, base_url=None, fundraising=None, cover=None):
  33         self._base_url = base_url or 'file:///home/rczajka/for/fnp/librarian/temp~/maly/img/'
  34         self.fundraising = fundraising
  35         self.footnotes = etree.Element('div', id='footnotes')
  36         self.make_cover = cover or make_cover
  37
  38         self.cursors = {
  39 #            None: None,
  40 #            'header': self.header,
  41             'footnotes': self.footnotes,
  42         }
  43         self.current_cursors = []
  44
  45         self.toc_base = 0
  46
  47     @property
  48     def cursor(self):
  49         return self.current_cursors[-1]
  50
  51     def enter_fragment(self, fragment):
  52         self.current_cursors.append(self.cursors[fragment])
  53
  54     def exit_fragment(self):
  55         self.current_cursors.pop()
  56
  57     def create_fragment(self, name, element):
  58         assert name not in self.cursors
  59         self.cursors[name] = element
  60
  61     def forget_fragment(self, name):
  62         del self.cursors[name]
  63
  64
  65
  66     @property
  67     def base_url(self):
  68         if self._base_url is not None:
  69             return self._base_url
  70         else:
  71             return 'https://wolnelektury.pl/media/book/pictures/{}/'.format(self.document.meta.url.slug)
  72
  73
  74     # Base URL should be on Document level, not builder.
  75     def build(self, document, **kwargs):
  76         """Should return an OutputFile with the output."""
  77         raise NotImplementedError()
  78
  79
  80 class EpubBuilder(Builder):
  81     file_extension = 'epub'
  82     isbn_field = 'isbn_epub'
  83
  84     def __init__(self, *args, **kwargs):
  85         self.chars = set()
  86         self.fundr = 0
  87         super().__init__(*args, **kwargs)
  88
  89     def build(self, document, **kwargs):
  90         # replace_characters -- nie, robimy to na poziomie elementów
  91
  92         # hyphenator (\00ad w odp. miejscach) -- jeśli już, to też powinno to się dziać na poziomie elementów
  93         # spójniki (\u00a0 po)-- jeśli już, to na poziomie elementów
  94         # trick na dywizy: &#xad;&#8288;-
  95
  96         # do toc trafia:
  97         #   początek z KAŻDEGO PLIKU xml
  98
  99         # zliczamy zbiór użytych znaków
 100
 101         # flagi:
 102         # mieliśmy taką flagę less-advertising, używaną tylko dla Prestigio; już nie używamy.
 103
 104         # @editors = document.editors() (jako str)
 105         # @funders = join(meta.funders)
 106         # @thanks = meta.thanks
 107
 108
 109         self.output = output = epub.EpubBook()
 110         self.document = document
 111
 112         self.set_metadata()
 113
 114         self.add_cover()
 115
 116         self.add_title_page()
 117         self.add_toc()
 118
 119
 120
 121         self.start_chunk()
 122
 123         self.add_toc_entry(
 124             None,
 125             'Początek utworu', # i18n
 126             0
 127         )
 128         self.output.guide.append({
 129             "type": "text",
 130             "title": "Początek",
 131             "href": "part1.xhtml"
 132         })
 133
 134
 135         self.build_document(self.document)
 136
 137
 138         self.close_chunk()
 139
 140         self.add_annotations()
 141         self.add_support_page()
 142         self.add_last_page()
 143
 144         if self.fundraising:
 145             e = len(self.output.spine) - 3 - 3
 146             nfunds = len(self.fundraising)
 147             if e > 4 * nfunds:
 148                 nfunds *= 2
 149
 150             # COUNTING CHARACTERS?
 151             for f in range(nfunds):
 152                 spine_index = int(4 + (f / nfunds * e) + f)
 153
 154                 h = Xhtml()
 155                 h.body.append(
 156                     etree.XML('<div id="book-text"><div class="fundraising">' + self.fundraising[f % len(self.fundraising)] + '</div></div>')
 157                 )
 158                 self.add_html(h.element, file_name='fund%d.xhtml' % f, spine=spine_index)
 159
 160         self.add_fonts()
 161
 162         output_file = tempfile.NamedTemporaryFile(
 163             prefix='librarian', suffix='.epub',
 164             delete=False)
 165         output_file.close()
 166         epub.write_epub(output_file.name, output, {'epub3_landmark': False})
 167         return OutputFile.from_filename(output_file.name)
 168
 169     def build_document(self, document):
 170         self.toc_precedences = []
 171
 172         self.start_chunk()
 173
 174
 175         document.tree.getroot().epub_build(self)
 176         if document.meta.parts:
 177             self.start_chunk()
 178
 179             self.start_element('div', {'class': 'title-page'})
 180             self.start_element('h1', {'class': 'title'})
 181             self.push_text(document.meta.title)
 182             self.end_element()
 183             self.end_element()
 184
 185             ######
 186             # 160
 187             # translators
 188             # working copy?
 189             # ta lektura
 190             # tanks
 191             # utwor opracowany
 192             # isbn
 193             # logo
 194
 195             for child in document.children:
 196                 self.start_chunk()
 197                 self.add_toc_entry(None, child.meta.title, 0)
 198                 self.build_document(child)
 199
 200         self.shift_toc_base()
 201
 202
 203     def add_title_page(self):
 204         html = Xhtml()
 205         html.title.text = "Strona tytułowa"
 206         bt = etree.SubElement(html.body, 'div', **{'id': 'book-text'})
 207         tp = etree.SubElement(bt, 'div', **{'class': 'title-page'})
 208
 209         # Tak jak jest teraz – czy może być jednocześnie
 210         # no „autor_utworu”
 211         # i „dzieło nadrzędne”
 212         # wcześniej mogło być dzieło nadrzędne,
 213
 214         e = self.document.tree.find('//autor_utworu')
 215         if e is not None:
 216             etree.SubElement(tp, 'h2', **{'class': 'author'}).text = e.raw_printable_text(self)
 217         e = self.document.tree.find('//nazwa_utworu')
 218         if e is not None:
 219             etree.SubElement(tp, 'h1', **{'class': 'title'}).text = e.raw_printable_text(self)
 220
 221         if not len(tp):
 222             for author in self.document.meta.authors:
 223                 etree.SubElement(tp, 'h2', **{'class': 'author'}).text = author.readable()
 224             etree.SubElement(tp, 'h1', **{'class': 'title'}).text = self.document.meta.title
 225
 226 #                <xsl:apply-templates select="//nazwa_utworu | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
 227 #        else:
 228 #                            <xsl:apply-templates select="//dc:creator" mode="poczatek"/>
 229 #                <xsl:apply-templates select="//dc:title | //podtytul | //dzielo_nadrzedne" mode="poczatek"/>
 230
 231         etree.SubElement(tp, 'p', **{"class": "info"}).text = '\u00a0'
 232
 233         if self.document.meta.translators:
 234             p = etree.SubElement(tp, 'p', **{'class': 'info'})
 235             p.text = 'tłum. ' + ', '.join(t.readable() for t in self.document.meta.translators)
 236
 237         #<p class="info">[Kopia robocza]</p>
 238
 239         p = etree.XML("""<p class="info">
 240               <a>Ta lektura</a>, podobnie jak tysiące innych, jest dostępna on-line na stronie
 241               <a href="http://www.wolnelektury.pl/">wolnelektury.pl</a>.
 242             </p>""")
 243         p[0].attrib['href'] = str(self.document.meta.url)
 244         tp.append(p)
 245
 246         if self.document.meta.thanks:
 247             etree.SubElement(tp, 'p', **{'class': 'info'}).text = self.document.meta.thanks
 248
 249         tp.append(etree.XML("""
 250           <p class="info">
 251             Utwór opracowany został w&#160;ramach projektu<a href="http://www.wolnelektury.pl/"> Wolne Lektury</a> przez<a href="http://www.nowoczesnapolska.org.pl/"> fundację Nowoczesna Polska</a>.
 252           </p>
 253         """))
 254
 255         if getattr(self.document.meta, self.isbn_field):
 256             etree.SubElement(tp, 'p', **{"class": "info"}).text = getattr(self.document.meta, self.isbn_field)
 257
 258         tp.append(etree.XML("""<p class="footer info">
 259             <a href="http://www.wolnelektury.pl/"><img src="logo_wolnelektury.png" alt="WolneLektury.pl" /></a>
 260         </p>"""))
 261
 262         self.add_html(
 263             html.element,
 264             file_name='title.xhtml',
 265             spine=True,
 266             toc='Strona tytułowa' # TODO: i18n
 267         )
 268
 269         self.add_file(
 270             get_resource('res/wl-logo-small.png'),
 271             file_name='logo_wolnelektury.png',
 272             media_type='image/png'
 273         )
 274
 275     def set_metadata(self):
 276         self.output.set_identifier(
 277             str(self.document.meta.url))
 278         self.output.set_language(
 279             functions.lang_code_3to2(self.document.meta.language)
 280         )
 281         self.output.set_title(self.document.meta.title)
 282
 283         for i, author in enumerate(self.document.meta.authors):
 284             self.output.add_author(
 285                 author.readable(),
 286                 file_as=six.text_type(author),
 287                 uid='creator{}'.format(i)
 288             )
 289         for translator in self.document.meta.translators:
 290             self.output.add_author(
 291                 translator.readable(),
 292                 file_as=six.text_type(translator),
 293                 role='trl',
 294                 uid='translator{}'.format(i)
 295             )
 296         for publisher in self.document.meta.publisher:
 297             self.output.add_metadata("DC", "publisher", publisher)
 298
 299         self.output.add_metadata("DC", "date", self.document.meta.created_at)
 300
 301
 302
 303
 304     def add_toc(self):
 305         item = epub.EpubNav()
 306         self.output.add_item(item)
 307         self.output.spine.append(item)
 308         self.output.add_item(epub.EpubNcx())
 309
 310         self.output.toc.append(
 311             epub.Link(
 312                 "nav.xhtml",
 313                 "Spis treści",
 314                 "nav"
 315             )
 316         )
 317
 318
 319
 320     def add_support_page(self):
 321         self.add_file(
 322             get_resource('epub/support.xhtml'),
 323             spine=True,
 324             toc='Wesprzyj Wolne Lektury'
 325         )
 326
 327         self.add_file(
 328             get_resource('res/jedenprocent.png'),
 329             media_type='image/png'
 330         )
 331         self.add_file(
 332             get_resource('epub/style.css'),
 333             media_type='text/css'
 334         )
 335
 336
 337     def add_file(self, path=None, content=None,
 338                  media_type='application/xhtml+xml',
 339                  file_name=None, uid=None,
 340                  spine=False, toc=None):
 341
 342         # update chars?
 343         # jakieś tam ścieśnianie białych znaków?
 344
 345         if content is None:
 346             with open(path, 'rb') as f:
 347                 content = f.read()
 348             if file_name is None:
 349                 file_name = path.rsplit('/', 1)[-1]
 350
 351         if uid is None:
 352             uid = file_name.split('.', 1)[0]
 353
 354         item = epub.EpubItem(
 355             uid=uid,
 356             file_name=file_name,
 357             media_type=media_type,
 358             content=content
 359         )
 360
 361         self.output.add_item(item)
 362         if spine:
 363             if spine is True:
 364                 self.output.spine.append(item)
 365             else:
 366                 self.output.spine.insert(spine, item)
 367
 368         if toc:
 369             self.output.toc.append(
 370                 epub.Link(
 371                     file_name,
 372                     toc,
 373                     uid
 374                 )
 375             )
 376
 377     def add_html(self, html_tree, **kwargs):
 378         html = etree.tostring(
 379             html_tree, pretty_print=True, xml_declaration=True,
 380             encoding="utf-8",
 381             doctype='<!DOCTYPE html>'
 382         )
 383
 384         html = librarian.epub.squeeze_whitespace(html)
 385
 386         self.add_file(
 387             content=html,
 388             **kwargs
 389         )
 390
 391
 392     def add_fonts(self):
 393         for fname in ('DejaVuSerif.ttf', 'DejaVuSerif-Bold.ttf',
 394                       'DejaVuSerif-Italic.ttf', 'DejaVuSerif-BoldItalic.ttf'):
 395             self.add_file(
 396                 content=strip_font(
 397                     get_resource('fonts/' + fname),
 398                     self.chars
 399                 ),
 400                 file_name=fname,
 401                 media_type='font/ttf'
 402             )
 403
 404     def start_chunk(self):
 405         if getattr(self, 'current_chunk', None) is not None:
 406             if not len(self.current_chunk):
 407                 return
 408             self.close_chunk()
 409         self.current_chunk = etree.Element(
 410             'div',
 411             id="book-text"
 412         )
 413         self.cursors[None] = self.current_chunk
 414         self.current_cursors.append(self.current_chunk)
 415
 416         self.section_number = 0
 417
 418
 419     def close_chunk(self):
 420         assert self.cursor is self.current_chunk
 421         ###### -- what if we're inside?
 422
 423         chunk_no = getattr(
 424             self,
 425             'chunk_counter',
 426             1
 427         )
 428         self.chunk_counter = chunk_no + 1
 429
 430         html = Xhtml()
 431         html.body.append(self.current_chunk)
 432
 433         self.add_html(
 434             ## html container from template.
 435             #self.current_chunk,
 436             html.element,
 437             file_name='part%d.xhtml' % chunk_no,
 438             spine=True,
 439
 440         )
 441         self.current_chunk = None
 442         self.current_cursors.pop()
 443
 444     def start_element(self, tag, attr):
 445         self.current_cursors.append(
 446             etree.SubElement(self.cursor, tag, **attr)
 447         )
 448
 449     def end_element(self):
 450         self.current_cursors.pop()
 451
 452     def push_text(self, text):
 453         self.chars.update(text)
 454         if len(self.cursor):
 455             self.cursor[-1].tail = (self.cursor[-1].tail or '') + text
 456         else:
 457             self.cursor.text = (self.cursor.text or '') + text
 458
 459
 460     def assign_image_number(self):
 461         image_number = getattr(self, 'image_number', 0)
 462         self.image_number = image_number + 1
 463         return image_number
 464
 465     def assign_footnote_number(self):
 466         number = getattr(self, 'footnote_number', 1)
 467         self.footnote_number = number + 1
 468         return number
 469
 470     def assign_section_number(self):
 471         number = getattr(self, 'section_number', 1)
 472         self.section_number = number + 1
 473         return number
 474
 475     def assign_mathml_number(self):
 476         number = getattr(self, 'mathml_number', 0)
 477         self.mathml_number = number + 1
 478         return number
 479
 480
 481     def add_toc_entry(self, fragment, name, precedence):
 482         if precedence:
 483             while self.toc_precedences and self.toc_precedences[-1] >= precedence:
 484                 self.toc_precedences.pop()
 485         else:
 486             self.toc_precedences = []
 487
 488         real_level = self.toc_base + len(self.toc_precedences)
 489         if precedence:
 490             self.toc_precedences.append(precedence)
 491         else:
 492             self.toc_base += 1
 493
 494         part_number = getattr(
 495             self,
 496             'chunk_counter',
 497             1
 498         )
 499         filename = 'part%d.xhtml' % part_number
 500         uid = filename.split('.')[0]
 501         if fragment:
 502             filename += '#' + fragment
 503             uid += '-' + fragment
 504
 505         toc = self.output.toc
 506         for l in range(1, real_level):
 507             if isinstance(toc[-1], epub.Link):
 508                 toc[-1] = [toc[-1], []]
 509             toc = toc[-1][1]
 510
 511         toc.append(
 512             epub.Link(
 513                 filename,
 514                 name,
 515                 uid
 516             )
 517         )
 518
 519     def shift_toc_base(self):
 520         self.toc_base -= 1
 521
 522
 523     def add_last_page(self):
 524         html = Xhtml()
 525         m = self.document.meta
 526
 527         html.title.text = 'Strona redakcyjna'
 528         d = etree.SubElement(html.body, 'div', id='book-text')
 529
 530         newp = lambda: etree.SubElement(d, 'p', {'class': 'info'})
 531
 532         p = newp()
 533         p.text = (
 534             "Wszystkie zasoby Wolnych Lektur możesz swobodnie wykorzystywać, "
 535             "publikować i rozpowszechniać pod warunkiem zachowania warunków "
 536             "licencji i zgodnie z "
 537         )
 538         a = etree.SubElement(p, "a", href="https://wolnelektury.pl/info/zasady-wykorzystania/")
 539         a.text = "Zasadami wykorzystania Wolnych Lektur"
 540         a.tail = "."
 541
 542         etree.SubElement(p, "br")
 543
 544
 545         if m.license:
 546             p[-1].tail = "Ten utwór jest udostępniony na licencji "
 547             etree.SubElement(p, 'a', href=m.license).text = m.license_description
 548         else:
 549             p[-1].tail = 'Ten utwór jest w domenie publicznej.'
 550
 551         etree.SubElement(p, "br")
 552
 553         p[-1].tail = (
 554             "Wszystkie materiały dodatkowe (przypisy, motywy literackie) są "
 555             "udostępnione na "
 556             )
 557         etree.SubElement(p, 'a', href='https://artlibre.org/licence/lal/pl/').text = 'Licencji Wolnej Sztuki 1.3'
 558         p[-1].tail = '.'
 559         etree.SubElement(p, "br")
 560         p[-1].tail = (
 561             "Fundacja Nowoczesna Polska zastrzega sobie prawa do wydania "
 562             "krytycznego zgodnie z art. Art.99(2) Ustawy o prawach autorskich "
 563             "i prawach pokrewnych. Wykorzystując zasoby z Wolnych Lektur, "
 564             "należy pamiętać o zapisach licencji oraz zasadach, które "
 565             "spisaliśmy w "
 566         )
 567
 568         etree.SubElement(p, 'a', href='https://wolnelektury.pl/info/zasady-wykorzystania/').text = 'Zasadach wykorzystania Wolnych Lektur'
 569         p[-1].tail = '. Zapoznaj się z nimi, zanim udostępnisz dalej nasze książki.'
 570
 571         p = newp()
 572         p.text = 'E-book można pobrać ze strony: '
 573         etree.SubElement(
 574             p, 'a', href=str(m.url),
 575             title=', '.join((
 576                 ', '.join(p.readable() for p in m.authors),
 577                 m.title
 578             ))
 579         ).text = str(m.url)
 580
 581         if m.source_name:
 582             newp().text = 'Tekst opracowany na podstawie: ' + m.source_name
 583
 584         newp().text = """
 585               Wydawca:
 586               """ + ", ".join(p for p in m.publisher)
 587
 588         if m.description:
 589             newp().text = m.description
 590
 591
 592         if m.editors:
 593             newp().text = 'Opracowanie redakcyjne i przypisy: %s.' % (
 594                 ', '.join(e.readable() for e in sorted(self.document.editors())))
 595
 596         if m.funders:
 597             etree.SubElement(d, 'p', {'class': 'minor-info'}).text = '''Publikację wsparli i wsparły:
 598             %s.''' % (', '.join(m.funders))
 599
 600         if m.cover_by:
 601             p = newp()
 602             p.text = 'Okładka na podstawie: '
 603             if m.cover_source:
 604                 etree.SubElement(
 605                     p,
 606                     'a',
 607                     href=m.cover_source
 608                 ).text = m.cover_by
 609             else:
 610                 p.text += m.cover_by
 611
 612         if getattr(m, self.isbn_field):
 613             newp().text = getattr(m, self.isbn_field)
 614
 615         newp().text = '\u00a0'
 616
 617         p = newp()
 618         p.attrib['class'] = 'minor-info'
 619         p.text = '''
 620               Plik wygenerowany dnia '''
 621         span = etree.SubElement(p, 'span', id='file_date')
 622         span.text = str(date.today())
 623         span.tail = '''.
 624           '''
 625
 626         self.add_html(
 627             html.element,
 628             file_name='last.xhtml',
 629             toc='Strona redakcyjna',
 630             spine=True
 631         )
 632
 633
 634     def add_annotations(self):
 635         if not len(self.footnotes):
 636             return
 637
 638         html = Xhtml()
 639         html.title.text = 'Przypisy'
 640         d = etree.SubElement(
 641             etree.SubElement(
 642                 html.body,
 643                 'div',
 644                 id='book-text'
 645             ),
 646             'div',
 647             id='footnotes'
 648         )
 649
 650         etree.SubElement(
 651             d,
 652             'h2',
 653         ).text = 'Przypisy:'
 654
 655         d.extend(self.footnotes)
 656
 657         self.add_html(
 658             html.element,
 659             file_name='annotations.xhtml',
 660             spine=True,
 661             toc='Przypisy'
 662         )
 663
 664     def add_cover(self):
 665         # TODO: allow other covers
 666
 667         cover_maker = self.make_cover
 668
 669         cover_file = six.BytesIO()
 670         cover = cover_maker(self.document.meta, width=600)
 671         cover.save(cover_file)
 672         cover_name = 'cover.%s' % cover.ext()
 673
 674         self.output.set_cover(
 675             file_name=cover_name,
 676             content=cover_file.getvalue(),
 677             create_page = False
 678         )
 679         ci = ('''<?xml version="1.0" encoding="UTF-8"?>
 680 <!DOCTYPE html>
 681 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="en" xml:lang="en">
 682  <head>
 683   <title>Okładka</title>
 684   <style>
 685     body { margin: 0em; padding: 0em; }
 686     img { width: 100%%; }
 687   </style>
 688  </head>
 689  <body>
 690    <img src="cover.%s" alt="Okładka" />
 691  </body>
 692 </html>''' % cover.ext()).encode('utf-8')
 693         self.add_file(file_name='cover.xhtml', content=ci)
 694
 695         self.output.spine.append(('cover', 'no'))
 696         self.output.guide.append({
 697             'type': 'cover',
 698             'href': 'cover.xhtml',
 699             'title': 'Okładka'
 700         })
 701
 702     def mathml(self, element):
 703         name = "math%d.png" % self.assign_mathml_number()
 704         self.add_file(
 705             content=MathML(element).to_latex().to_png().data,
 706             media_type='image/png',
 707             file_name=name
 708         )
 709         return name