Remove direct verse styling from html.

[librarian.git] / src / librarian / pdf.py
diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py

index 31dfe1e..94d14cf 100644 (file)
--- a/src/librarian/pdf.py
+++ b/src/librarian/pdf.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
  # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
  #
  """PDF creation library.
  
  #
  """PDF creation library.
  
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import print_function, unicode_literals
-
+import io
  import os
  import os.path
  import shutil
  import os
  import os.path
  import shutil
@@ -19,12 +16,13 @@ import re
  from copy import deepcopy
  from subprocess import call, PIPE
  from itertools import chain
  from copy import deepcopy
  from subprocess import call, PIPE
  from itertools import chain
+import urllib.parse
+import urllib.request
  
  from PIL import Image
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from PIL import Image
  from Texml.processor import process
  from lxml import etree
  from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
  
  from librarian.dcparser import Person
  from librarian.parser import WLDocument
@@ -294,15 +292,15 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          # add customizations
          if customizations is not None:
  
          # add customizations
          if customizations is not None:
-            root.set('customizations', u','.join(customizations))
+            root.set('customizations', ','.join(customizations))
  
          # add editors info
          editors = document.editors()
          if editors:
  
          # add editors info
          editors = document.editors()
          if editors:
-            root.set('editors', u', '.join(sorted(
+            root.set('editors', ', '.join(sorted(
                  editor.readable() for editor in editors)))
          if document.book_info.funders:
                  editor.readable() for editor in editors)))
          if document.book_info.funders:
-            root.set('funders', u', '.join(document.book_info.funders))
+            root.set('funders', ', '.join(document.book_info.funders))
          if document.book_info.thanks:
              root.set('thanks', document.book_info.thanks)
  
          if document.book_info.thanks:
              root.set('thanks', document.book_info.thanks)
  
@@ -325,11 +323,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
          temp = mkdtemp('-wl2pdf')
  
          for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
          temp = mkdtemp('-wl2pdf')
  
          for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
-            url = six.moves.urllib.parse.urljoin(
+            url = urllib.parse.urljoin(
                  base_url,
                  ilustr.get('src')
              )
                  base_url,
                  ilustr.get('src')
              )
-            imgfile = six.moves.urllib.request.urlopen(url)
+            imgfile = urllib.request.urlopen(url)
              img = Image.open(imgfile)
  
              th_format, ext, media_type = {
              img = Image.open(imgfile)
  
              th_format, ext, media_type = {
@@ -371,7 +369,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
  
          tex_path = os.path.join(temp, 'doc.tex')
          fout = open(tex_path, 'wb')
  
          tex_path = os.path.join(temp, 'doc.tex')
          fout = open(tex_path, 'wb')
-        process(six.BytesIO(texml), fout, 'utf-8')
+        process(io.BytesIO(texml), fout, 'utf-8')
          fout.close()
          del texml
  
          fout.close()
          del texml
  
@@ -425,7 +423,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
      """
  
      if uri and provider:
      """
  
      if uri and provider:
-        f = provider.by_uri(uri)
+        f = provider.by_slug(uri.slug)
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
          text = f.read().decode('utf-8')
          f.close()
      elif wldoc is not None:
@@ -436,7 +434,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
              'Neither a WLDocument, nor provider and URI were provided.'
          )
  
              'Neither a WLDocument, nor provider and URI were provided.'
          )
  
+    # Cyrrilic
      text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
      text = re.sub(r"([\u0400-\u04ff]+)", r"<alien>\1</alien>", text)
+    # Geometric shapes.
+    text = re.sub(r"([\u25a0-\u25ff]+)", r"<alien>\1</alien>", text)
  
      document = WLDocument.from_bytes(text.encode('utf-8'),
                                       parse_dublincore=True, provider=provider)
  
      document = WLDocument.from_bytes(text.encode('utf-8'),
                                       parse_dublincore=True, provider=provider)