Some prelim work on builder api.
[librarian.git] / src / librarian / pdf.py
index a51dbb5..de09755 100644 (file)
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 #
 """PDF creation library.
 
 #
 """PDF creation library.
 
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
 with TeXML, then runs it by XeLaTeX.
 
 """
 with TeXML, then runs it by XeLaTeX.
 
 """
-from __future__ import print_function, unicode_literals
-
+import io
 import os
 import os.path
 import shutil
 import os
 import os.path
 import shutil
@@ -19,11 +16,13 @@ import re
 from copy import deepcopy
 from subprocess import call, PIPE
 from itertools import chain
 from copy import deepcopy
 from subprocess import call, PIPE
 from itertools import chain
+import urllib.parse
+import urllib.request
 
 
+from PIL import Image
 from Texml.processor import process
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
 from Texml.processor import process
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
 
 from librarian.dcparser import Person
 from librarian.parser import WLDocument
 
 from librarian.dcparser import Person
 from librarian.parser import WLDocument
@@ -189,6 +188,29 @@ def hack_motifs(doc):
                 break
 
 
                 break
 
 
+def add_fundraising(doc, fundraising):
+    # Before each naglowek_rozdzial and naglowek_scena and in the end
+    spots = []
+    for naglowek in doc.xpath('//naglowek_czesc|//naglowek_akt'):
+        spot = etree.Element('f_spot')
+        naglowek.addprevious(spot)
+        spots.append(spot)
+    spot = etree.Element('f_spot')
+    doc.getroot()[-1].append(spot)
+    spots.append(spot)
+    e = len(spots)
+    nfunds = len(fundraising)
+    if e > 4 * nfunds:
+        nfunds *= 2
+    for f in range(nfunds):
+        spot_index = int(f / nfunds * e)
+        spots[spot_index].set('active', 'true')
+        elem = etree.fromstring('<f_spot>' + fundraising[f % len(fundraising)] + '</f_spot>')
+        spots[spot_index].text = elem.text
+        for c in elem:
+            spots[spot_index].append(c)
+
+
 def parse_creator(doc):
     """Generates readable versions of creator and translator tags.
 
 def parse_creator(doc):
     """Generates readable versions of creator and translator tags.
 
@@ -242,8 +264,8 @@ def package_available(package, args='', verbose=False):
 
 
 def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
 
 
 def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
-              cover=None, flags=None, customizations=None, ilustr_path='',
-              latex_dir=False):
+              cover=None, flags=None, customizations=None, base_url='file://./',
+              latex_dir=False, fundraising=None):
     """ produces a PDF file with XeLaTeX
 
     wldoc: a WLDocument
     """ produces a PDF file with XeLaTeX
 
     wldoc: a WLDocument
@@ -283,21 +305,31 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         elif package_available('morefloats', 'maxfloats=19'):
             root.set('morefloats', 'new')
 
         elif package_available('morefloats', 'maxfloats=19'):
             root.set('morefloats', 'new')
 
+        if customizations is None:
+            customizations = []
+        else:
+            customizations = list(customizations)
+
+        if book_info.endnotes:
+            customizations.append('endnotes')
+
         # add customizations
         if customizations is not None:
         # add customizations
         if customizations is not None:
-            root.set('customizations', u','.join(customizations))
+            root.set('customizations', ','.join(customizations))
 
         # add editors info
         editors = document.editors()
         if editors:
 
         # add editors info
         editors = document.editors()
         if editors:
-            root.set('editors', u', '.join(sorted(
+            root.set('editors', ', '.join(sorted(
                 editor.readable() for editor in editors)))
         if document.book_info.funders:
                 editor.readable() for editor in editors)))
         if document.book_info.funders:
-            root.set('funders', u', '.join(document.book_info.funders))
+            root.set('funders', ', '.join(document.book_info.funders))
         if document.book_info.thanks:
             root.set('thanks', document.book_info.thanks)
 
         # hack the tree
         if document.book_info.thanks:
             root.set('thanks', document.book_info.thanks)
 
         # hack the tree
+        if fundraising:
+            add_fundraising(document.edoc, fundraising)
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
         parse_creator(document.edoc)
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
         parse_creator(document.edoc)
@@ -305,6 +337,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         fix_hanging(document.edoc)
         fix_tables(document.edoc)
         mark_subauthors(document.edoc)
         fix_hanging(document.edoc)
         fix_tables(document.edoc)
         mark_subauthors(document.edoc)
+        document.fix_pa_akap()
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
@@ -314,8 +347,30 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         # TeXML -> LaTeX
         temp = mkdtemp('-wl2pdf')
 
         # TeXML -> LaTeX
         temp = mkdtemp('-wl2pdf')
 
-        for ilustr in document.edoc.findall("//ilustr"):
-            shutil.copy(os.path.join(ilustr_path, ilustr.get("src")), temp)
+        for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
+            url = urllib.parse.urljoin(
+                base_url,
+                ilustr.get('src')
+            )
+            imgfile = urllib.request.urlopen(url)
+            img = Image.open(imgfile)
+
+            th_format, ext, media_type = {
+                'GIF': ('GIF', 'gif', 'image/gif'),
+                'PNG': ('PNG', 'png', 'image/png'),
+            }.get(img.format, ('JPEG', 'jpg', 'image/jpeg'))
+
+            width = 2400
+            if img.size[0] < width:
+                th = img
+            else:
+                th = img.resize((width, round(width * img.size[1] / img.size[0])))
+
+            file_name = 'image%d.%s' % (i, ext)
+            th.save(os.path.join(temp, file_name))
+            ilustr.set('src', file_name)
+
+            imgfile.close()
 
         for sponsor in book_info.sponsors:
             ins = etree.Element("data-sponsor", name=sponsor)
 
         for sponsor in book_info.sponsors:
             ins = etree.Element("data-sponsor", name=sponsor)
@@ -339,7 +394,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
 
         tex_path = os.path.join(temp, 'doc.tex')
         fout = open(tex_path, 'wb')
 
         tex_path = os.path.join(temp, 'doc.tex')
         fout = open(tex_path, 'wb')
-        process(six.BytesIO(texml), fout, 'utf-8')
+        process(io.BytesIO(texml), fout, 'utf-8')
         fout.close()
         del texml
 
         fout.close()
         del texml
 
@@ -393,7 +448,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
     """
 
     if uri and provider:
     """
 
     if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
         text = f.read().decode('utf-8')
         f.close()
     elif wldoc is not None:
         text = f.read().decode('utf-8')
         f.close()
     elif wldoc is not None:
@@ -404,7 +459,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
+    # Cyrrilic
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)