X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/175c5cf4f727162fa5bddd2460d37595251bbe8e..23d025c8875cca1404f274aca7170c9db5e980e7:/src/librarian/pdf.py
diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py
index 31dfe1e..de09755 100644
--- a/src/librarian/pdf.py
+++ b/src/librarian/pdf.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-#
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
-# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+# Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
#
"""PDF creation library.
@@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import print_function, unicode_literals
-
+import io
import os
import os.path
import shutil
@@ -19,12 +16,13 @@ import re
from copy import deepcopy
from subprocess import call, PIPE
from itertools import chain
+import urllib.parse
+import urllib.request
from PIL import Image
from Texml.processor import process
from lxml import etree
from lxml.etree import XMLSyntaxError, XSLTApplyError
-import six
from librarian.dcparser import Person
from librarian.parser import WLDocument
@@ -190,6 +188,29 @@ def hack_motifs(doc):
break
+def add_fundraising(doc, fundraising):
+ # Before each naglowek_rozdzial and naglowek_scena and in the end
+ spots = []
+ for naglowek in doc.xpath('//naglowek_czesc|//naglowek_akt'):
+ spot = etree.Element('f_spot')
+ naglowek.addprevious(spot)
+ spots.append(spot)
+ spot = etree.Element('f_spot')
+ doc.getroot()[-1].append(spot)
+ spots.append(spot)
+ e = len(spots)
+ nfunds = len(fundraising)
+ if e > 4 * nfunds:
+ nfunds *= 2
+ for f in range(nfunds):
+ spot_index = int(f / nfunds * e)
+ spots[spot_index].set('active', 'true')
+ elem = etree.fromstring('' + fundraising[f % len(fundraising)] + '')
+ spots[spot_index].text = elem.text
+ for c in elem:
+ spots[spot_index].append(c)
+
+
def parse_creator(doc):
"""Generates readable versions of creator and translator tags.
@@ -244,7 +265,7 @@ def package_available(package, args='', verbose=False):
def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
cover=None, flags=None, customizations=None, base_url='file://./',
- latex_dir=False):
+ latex_dir=False, fundraising=None):
""" produces a PDF file with XeLaTeX
wldoc: a WLDocument
@@ -294,19 +315,21 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
# add customizations
if customizations is not None:
- root.set('customizations', u','.join(customizations))
+ root.set('customizations', ','.join(customizations))
# add editors info
editors = document.editors()
if editors:
- root.set('editors', u', '.join(sorted(
+ root.set('editors', ', '.join(sorted(
editor.readable() for editor in editors)))
if document.book_info.funders:
- root.set('funders', u', '.join(document.book_info.funders))
+ root.set('funders', ', '.join(document.book_info.funders))
if document.book_info.thanks:
root.set('thanks', document.book_info.thanks)
# hack the tree
+ if fundraising:
+ add_fundraising(document.edoc, fundraising)
move_motifs_inside(document.edoc)
hack_motifs(document.edoc)
parse_creator(document.edoc)
@@ -325,11 +348,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
temp = mkdtemp('-wl2pdf')
for i, ilustr in enumerate(document.edoc.findall('//ilustr')):
- url = six.moves.urllib.parse.urljoin(
+ url = urllib.parse.urljoin(
base_url,
ilustr.get('src')
)
- imgfile = six.moves.urllib.request.urlopen(url)
+ imgfile = urllib.request.urlopen(url)
img = Image.open(imgfile)
th_format, ext, media_type = {
@@ -371,7 +394,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None,
tex_path = os.path.join(temp, 'doc.tex')
fout = open(tex_path, 'wb')
- process(six.BytesIO(texml), fout, 'utf-8')
+ process(io.BytesIO(texml), fout, 'utf-8')
fout.close()
del texml
@@ -425,7 +448,7 @@ def load_including_children(wldoc=None, provider=None, uri=None):
"""
if uri and provider:
- f = provider.by_uri(uri)
+ f = provider.by_slug(uri.slug)
text = f.read().decode('utf-8')
f.close()
elif wldoc is not None:
@@ -436,7 +459,10 @@ def load_including_children(wldoc=None, provider=None, uri=None):
'Neither a WLDocument, nor provider and URI were provided.'
)
+ # Cyrrilic
text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text)
+ # Geometric shapes.
+ text = re.sub(r"([\u25a0-\u25ff]+)", r"\1", text)
document = WLDocument.from_bytes(text.encode('utf-8'),
parse_dublincore=True, provider=provider)