Limit image size, fixes #4464.

[librarian.git] / src / librarian / pdf.py
diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py

index a025b9b..de09755 100644 (file)
--- a/src/librarian/pdf.py
+++ b/src/librarian/pdf.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
  """PDF creation library.
  
  #
  """PDF creation library.
  
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import print_function, unicode_literals
-
+import io
  import os
  import os.path
  import shutil
  import os
  import os.path
  import shutil
@@ -19,12 +16,13 @@ import re
  from copy import deepcopy
  from subprocess import call, PIPE
  from itertools import chain
  from copy import deepcopy
  from subprocess import call, PIPE
  from itertools import chain
+import urllib.parse
+import urllib.request
  
  from PIL import Image
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from PIL import Image
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -190,6 +188,29 @@ def hack_motifs(doc):
                  break
  
  
                  break
  
  
+def add_fundraising(doc, fundraising):
+    # Before each naglowek_rozdzial and naglowek_scena and in the end
+    spots = []
+    for naglowek in doc.xpath('//naglowek_czesc|//naglowek_akt'):
+        spot = etree.Element('f_spot')
+        naglowek.addprevious(spot)
+        spots.append(spot)
+    spot = etree.Element('f_spot')
+    doc.getroot()[-1].append(spot)
+    spots.append(spot)
+    e = len(spots)
+    nfunds = len(fundraising)
+    if e > 4 * nfunds:
+        nfunds *= 2
+    for f in range(nfunds):
+        spot_index = int(f / nfunds * e)
+        spots[spot_index].set('active', 'true')
+        elem = etree.fromstring('<f_spot>' + fundraising[f % len(fundraising)] + '</f_spot>')
+        spots[spot_index].text = elem.text
+        for c in elem:
+            spots[spot_index].append(c)
+
+
  def parse_creator(doc):
      """Generates readable versions of creator and translator tags.
  
  def parse_creator(doc):
      """Generates readable versions of creator and translator tags.
  
@@ -244,7 +265,7 @@ def package_available(package, args='', verbose=False):
  
  def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
                cover=None, flags=None, customizations=None, base_url='file://./',
  
  def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
                cover=None, flags=None, customizations=None, base_url='file://./',
-              latex_dir=False):
+              latex_dir=False, fundraising=None):
      """ produces a PDF file with XeLaTeX
  
      wldoc: a WLDocument
      """ produces a PDF file with XeLaTeX
  
      wldoc: a WLDocument
@@ -284,21 +305,31 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          elif package_available('morefloats', 'maxfloats=19'):
              root.set('morefloats', 'new')
  
          elif package_available('morefloats', 'maxfloats=19'):
              root.set('morefloats', 'new')
  
+        if customizations is None:
+            customizations = []
+        else:
+            customizations = list(customizations)
+
+        if book_info.endnotes:
+            customizations.append('endnotes')
+
          # add customizations
          if customizations is not None:
          # add customizations
          if customizations is not None:
-            root.set('customizations', u','.join(customizations))
+            root.set('customizations', ','.join(customizations))
  
          # add editors info
          editors = document.editors()
          if editors:
  
          # add editors info
          editors = document.editors()
          if editors:
-            root.set('editors', u', '.join(sorted(
+            root.set('editors', ', '.join(sorted(
                  editor.readable() for editor in editors)))
          if document.book_info.funders:
                  editor.readable() for editor in editors)))
          if document.book_info.funders:
-            root.set('funders', u', '.join(document.book_info.funders))
+            root.set('funders', ', '.join(document.book_info.funders))
          if document.book_info.thanks:
              root.set('thanks', document.book_info.thanks)
  
          # hack the tree
          if document.book_info.thanks:
              root.set('thanks', document.book_info.thanks)
  
          # hack the tree
+        if fundraising:
+            add_fundraising(document.edoc, fundraising)
          move_motifs_inside(document.edoc)
          hack_motifs(document.edoc)
          parse_creator(document.edoc)
          move_motifs_inside(document.edoc)
          hack_motifs(document.edoc)
          parse_creator(document.edoc)
@@ -306,6 +337,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          fix_hanging(document.edoc)
          fix_tables(document.edoc)
          mark_subauthors(document.edoc)
          fix_hanging(document.edoc)
          fix_tables(document.edoc)
          mark_subauthors(document.edoc)
+        document.fix_pa_akap()
  
          # wl -> TeXML
          style_filename = get_stylesheet("wl2tex")
  
          # wl -> TeXML
          style_filename = get_stylesheet("wl2tex")
@@ -316,11 +348,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          temp = mkdtemp('-wl2pdf')
  
          for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
          temp = mkdtemp('-wl2pdf')
  
          for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
-            url = six.moves.urllib.parse.urljoin(
+            url = urllib.parse.urljoin(
                  base_url,
                  ilustr.get('src')
              )
                  base_url,
                  ilustr.get('src')
              )
-            imgfile = six.moves.urllib.request.urlopen(url)
+            imgfile = urllib.request.urlopen(url)
              img = Image.open(imgfile)
  
              th_format, ext, media_type = {
              img = Image.open(imgfile)
  
              th_format, ext, media_type = {
@@ -362,7 +394,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          tex_path = os.path.join(temp, 'doc.tex')
          fout = open(tex_path, 'wb')
  
          tex_path = os.path.join(temp, 'doc.tex')
          fout = open(tex_path, 'wb')
-        process(six.BytesIO(texml), fout, 'utf-8')
+        process(io.BytesIO(texml), fout, 'utf-8')
          fout.close()
          del texml
  
          fout.close()
          del texml
  
@@ -416,7 +448,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
      """
  
      if uri and provider:
      """
  
      if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
@@ -427,7 +459,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
              'Neither a WLDocument, nor provider and URI were provided.'
          )
  
              'Neither a WLDocument, nor provider and URI were provided.'
          )
  
+    # Cyrrilic
      text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
      text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
  
      document = WLDocument.from_bytes(text.encode('utf-8'),
                                       parse_dublincore=True, provider=provider)
  
      document = WLDocument.from_bytes(text.encode('utf-8'),
                                       parse_dublincore=True, provider=provider)