Remove direct verse styling from html.

[librarian.git] / src / librarian / pdf.py
diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py

index cad66a4..94d14cf 100644 (file)
--- a/src/librarian/pdf.py
+++ b/src/librarian/pdf.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
  """PDF creation library.
  
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import print_function, unicode_literals
-
+import io
  import os
  import os.path
  import shutil
@@ -19,12 +16,13 @@ import re
  from copy import deepcopy
  from subprocess import call, PIPE
  from itertools import chain
+import urllib.parse
+import urllib.request
  
  from PIL import Image
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -284,17 +282,25 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          elif package_available('morefloats', 'maxfloats=19'):
              root.set('morefloats', 'new')
  
+        if customizations is None:
+            customizations = []
+        else:
+            customizations = list(customizations)
+
+        if book_info.endnotes:
+            customizations.append('endnotes')
+
          # add customizations
          if customizations is not None:
-            root.set('customizations', u','.join(customizations))
+            root.set('customizations', ','.join(customizations))
  
          # add editors info
          editors = document.editors()
          if editors:
-            root.set('editors', u', '.join(sorted(
+            root.set('editors', ', '.join(sorted(
                  editor.readable() for editor in editors)))
          if document.book_info.funders:
-            root.set('funders', u', '.join(document.book_info.funders))
+            root.set('funders', ', '.join(document.book_info.funders))
          if document.book_info.thanks:
              root.set('thanks', document.book_info.thanks)
  
@@ -306,6 +312,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          fix_hanging(document.edoc)
          fix_tables(document.edoc)
          mark_subauthors(document.edoc)
+        document.fix_pa_akap()
  
          # wl -> TeXML
          style_filename = get_stylesheet("wl2tex")
@@ -316,12 +323,12 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          temp = mkdtemp('-wl2pdf')
  
          for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
-            url = six.moves.urllib.parse.urljoin(
+            url = urllib.parse.urljoin(
                  base_url,
                  ilustr.get('src')
              )
-            with six.moves.urllib.request.urlopen(url) as imgfile:
-                img = Image.open(imgfile)
+            imgfile = urllib.request.urlopen(url)
+            img = Image.open(imgfile)
  
              th_format, ext, media_type = {
                  'GIF': ('GIF', 'gif', 'image/gif'),
@@ -338,6 +345,8 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
              th.save(os.path.join(temp, file_name))
              ilustr.set('src', file_name)
  
+            imgfile.close()
+
          for sponsor in book_info.sponsors:
              ins = etree.Element("data-sponsor", name=sponsor)
              logo = sponsor_logo(sponsor)
@@ -360,7 +369,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          tex_path = os.path.join(temp, 'doc.tex')
          fout = open(tex_path, 'wb')
-        process(six.BytesIO(texml), fout, 'utf-8')
+        process(io.BytesIO(texml), fout, 'utf-8')
          fout.close()
          del texml
  
@@ -414,7 +423,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
      """
  
      if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
@@ -425,7 +434,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
              'Neither a WLDocument, nor provider and URI were provided.'
          )
  
+    # Cyrrilic
      text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
  
      document = WLDocument.from_bytes(text.encode('utf-8'),
                                       parse_dublincore=True, provider=provider)