Limit image size, fixes #4464.
[librarian.git] / src / librarian / pdf.py
index a025b9b..de09755 100644 (file)
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
 #
 """PDF creation library.
 
 #
 """PDF creation library.
 
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
 with TeXML, then runs it by XeLaTeX.
 
 """
 with TeXML, then runs it by XeLaTeX.
 
 """
-from __future__ import print_function, unicode_literals
-
+import io
 import os
 import os.path
 import shutil
 import os
 import os.path
 import shutil
@@ -19,12 +16,13 @@ import re
 from copy import deepcopy
 from subprocess import call, PIPE
 from itertools import chain
 from copy import deepcopy
 from subprocess import call, PIPE
 from itertools import chain
+import urllib.parse
+import urllib.request
 
 from PIL import Image
 from Texml.processor import process
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
 
 from PIL import Image
 from Texml.processor import process
 from lxml import etree
 from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
 
 from librarian.dcparser import Person
 from librarian.parser import WLDocument
 
 from librarian.dcparser import Person
 from librarian.parser import WLDocument
@@ -190,6 +188,29 @@ def hack_motifs(doc):
                 break
 
 
                 break
 
 
+def add_fundraising(doc, fundraising):
+    # Before each naglowek_rozdzial and naglowek_scena and in the end
+    spots = []
+    for naglowek in doc.xpath('//naglowek_czesc|//naglowek_akt'):
+        spot = etree.Element('f_spot')
+        naglowek.addprevious(spot)
+        spots.append(spot)
+    spot = etree.Element('f_spot')
+    doc.getroot()[-1].append(spot)
+    spots.append(spot)
+    e = len(spots)
+    nfunds = len(fundraising)
+    if e > 4 * nfunds:
+        nfunds *= 2
+    for f in range(nfunds):
+        spot_index = int(f / nfunds * e)
+        spots[spot_index].set('active', 'true')
+        elem = etree.fromstring('<f_spot>' + fundraising[f % len(fundraising)] + '</f_spot>')
+        spots[spot_index].text = elem.text
+        for c in elem:
+            spots[spot_index].append(c)
+
+
 def parse_creator(doc):
     """Generates readable versions of creator and translator tags.
 
 def parse_creator(doc):
     """Generates readable versions of creator and translator tags.
 
@@ -244,7 +265,7 @@ def package_available(package, args='', verbose=False):
 
 def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
               cover=None, flags=None, customizations=None, base_url='file://./',
 
 def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
               cover=None, flags=None, customizations=None, base_url='file://./',
-              latex_dir=False):
+              latex_dir=False, fundraising=None):
     """ produces a PDF file with XeLaTeX
 
     wldoc: a WLDocument
     """ produces a PDF file with XeLaTeX
 
     wldoc: a WLDocument
@@ -284,21 +305,31 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         elif package_available('morefloats', 'maxfloats=19'):
             root.set('morefloats', 'new')
 
         elif package_available('morefloats', 'maxfloats=19'):
             root.set('morefloats', 'new')
 
+        if customizations is None:
+            customizations = []
+        else:
+            customizations = list(customizations)
+
+        if book_info.endnotes:
+            customizations.append('endnotes')
+
         # add customizations
         if customizations is not None:
         # add customizations
         if customizations is not None:
-            root.set('customizations', u','.join(customizations))
+            root.set('customizations', ','.join(customizations))
 
         # add editors info
         editors = document.editors()
         if editors:
 
         # add editors info
         editors = document.editors()
         if editors:
-            root.set('editors', u', '.join(sorted(
+            root.set('editors', ', '.join(sorted(
                 editor.readable() for editor in editors)))
         if document.book_info.funders:
                 editor.readable() for editor in editors)))
         if document.book_info.funders:
-            root.set('funders', u', '.join(document.book_info.funders))
+            root.set('funders', ', '.join(document.book_info.funders))
         if document.book_info.thanks:
             root.set('thanks', document.book_info.thanks)
 
         # hack the tree
         if document.book_info.thanks:
             root.set('thanks', document.book_info.thanks)
 
         # hack the tree
+        if fundraising:
+            add_fundraising(document.edoc, fundraising)
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
         parse_creator(document.edoc)
         move_motifs_inside(document.edoc)
         hack_motifs(document.edoc)
         parse_creator(document.edoc)
@@ -306,6 +337,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         fix_hanging(document.edoc)
         fix_tables(document.edoc)
         mark_subauthors(document.edoc)
         fix_hanging(document.edoc)
         fix_tables(document.edoc)
         mark_subauthors(document.edoc)
+        document.fix_pa_akap()
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
 
         # wl -> TeXML
         style_filename = get_stylesheet("wl2tex")
@@ -316,11 +348,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
         temp = mkdtemp('-wl2pdf')
 
         for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
         temp = mkdtemp('-wl2pdf')
 
         for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
-            url = six.moves.urllib.parse.urljoin(
+            url = urllib.parse.urljoin(
                 base_url,
                 ilustr.get('src')
             )
                 base_url,
                 ilustr.get('src')
             )
-            imgfile = six.moves.urllib.request.urlopen(url)
+            imgfile = urllib.request.urlopen(url)
             img = Image.open(imgfile)
 
             th_format, ext, media_type = {
             img = Image.open(imgfile)
 
             th_format, ext, media_type = {
@@ -362,7 +394,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
 
         tex_path = os.path.join(temp, 'doc.tex')
         fout = open(tex_path, 'wb')
 
         tex_path = os.path.join(temp, 'doc.tex')
         fout = open(tex_path, 'wb')
-        process(six.BytesIO(texml), fout, 'utf-8')
+        process(io.BytesIO(texml), fout, 'utf-8')
         fout.close()
         del texml
 
         fout.close()
         del texml
 
@@ -416,7 +448,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
     """
 
     if uri and provider:
     """
 
     if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
         text = f.read().decode('utf-8')
         f.close()
     elif wldoc is not None:
         text = f.read().decode('utf-8')
         f.close()
     elif wldoc is not None:
@@ -427,7 +459,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
             'Neither a WLDocument, nor provider and URI were provided.'
         )
 
+    # Cyrrilic
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
     text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)
 
     document = WLDocument.from_bytes(text.encode('utf-8'),
                                      parse_dublincore=True, provider=provider)