X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/db91f942ce46e3af1420f3469a83257ef5aca4c2..82c0860d1520489be56457829d49eb17f165b9cd:/src/librarian/pdf.py diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py index a025b9b..de09755 100644 --- a/src/librarian/pdf.py +++ b/src/librarian/pdf.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- -# # This file is part of Librarian, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# Copyright © Fundacja Wolne Lektury. See NOTICE for more information. # """PDF creation library. @@ -9,8 +7,7 @@ Creates one big XML from the book and its children, converts it to LaTeX with TeXML, then runs it by XeLaTeX. """ -from __future__ import print_function, unicode_literals - +import io import os import os.path import shutil @@ -19,12 +16,13 @@ import re from copy import deepcopy from subprocess import call, PIPE from itertools import chain +import urllib.parse +import urllib.request from PIL import Image from Texml.processor import process from lxml import etree from lxml.etree import XMLSyntaxError, XSLTApplyError -import six from librarian.dcparser import Person from librarian.parser import WLDocument @@ -190,6 +188,29 @@ def hack_motifs(doc): break +def add_fundraising(doc, fundraising): + # Before each naglowek_rozdzial and naglowek_scena and in the end + spots = [] + for naglowek in doc.xpath('//naglowek_czesc|//naglowek_akt'): + spot = etree.Element('f_spot') + naglowek.addprevious(spot) + spots.append(spot) + spot = etree.Element('f_spot') + doc.getroot()[-1].append(spot) + spots.append(spot) + e = len(spots) + nfunds = len(fundraising) + if e > 4 * nfunds: + nfunds *= 2 + for f in range(nfunds): + spot_index = int(f / nfunds * e) + spots[spot_index].set('active', 'true') + elem = etree.fromstring('' + fundraising[f % len(fundraising)] + '') + spots[spot_index].text = elem.text + for c in elem: + spots[spot_index].append(c) + + def parse_creator(doc): """Generates readable versions of creator and translator tags. @@ -244,7 +265,7 @@ def package_available(package, args='', verbose=False): def transform(wldoc, verbose=False, save_tex=None, morefloats=None, cover=None, flags=None, customizations=None, base_url='file://./', - latex_dir=False): + latex_dir=False, fundraising=None): """ produces a PDF file with XeLaTeX wldoc: a WLDocument @@ -284,21 +305,31 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, elif package_available('morefloats', 'maxfloats=19'): root.set('morefloats', 'new') + if customizations is None: + customizations = [] + else: + customizations = list(customizations) + + if book_info.endnotes: + customizations.append('endnotes') + # add customizations if customizations is not None: - root.set('customizations', u','.join(customizations)) + root.set('customizations', ','.join(customizations)) # add editors info editors = document.editors() if editors: - root.set('editors', u', '.join(sorted( + root.set('editors', ', '.join(sorted( editor.readable() for editor in editors))) if document.book_info.funders: - root.set('funders', u', '.join(document.book_info.funders)) + root.set('funders', ', '.join(document.book_info.funders)) if document.book_info.thanks: root.set('thanks', document.book_info.thanks) # hack the tree + if fundraising: + add_fundraising(document.edoc, fundraising) move_motifs_inside(document.edoc) hack_motifs(document.edoc) parse_creator(document.edoc) @@ -306,6 +337,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, fix_hanging(document.edoc) fix_tables(document.edoc) mark_subauthors(document.edoc) + document.fix_pa_akap() # wl -> TeXML style_filename = get_stylesheet("wl2tex") @@ -316,11 +348,11 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, temp = mkdtemp('-wl2pdf') for i, ilustr in enumerate(document.edoc.findall('//ilustr')): - url = six.moves.urllib.parse.urljoin( + url = urllib.parse.urljoin( base_url, ilustr.get('src') ) - imgfile = six.moves.urllib.request.urlopen(url) + imgfile = urllib.request.urlopen(url) img = Image.open(imgfile) th_format, ext, media_type = { @@ -362,7 +394,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, tex_path = os.path.join(temp, 'doc.tex') fout = open(tex_path, 'wb') - process(six.BytesIO(texml), fout, 'utf-8') + process(io.BytesIO(texml), fout, 'utf-8') fout.close() del texml @@ -416,7 +448,7 @@ def load_including_children(wldoc=None, provider=None, uri=None): """ if uri and provider: - f = provider.by_uri(uri) + f = provider.by_slug(uri.slug) text = f.read().decode('utf-8') f.close() elif wldoc is not None: @@ -427,7 +459,10 @@ def load_including_children(wldoc=None, provider=None, uri=None): 'Neither a WLDocument, nor provider and URI were provided.' ) + # Cyrrilic text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text) + # Geometric shapes. + text = re.sub(r"([\u25a0-\u25ff]+)", r"\1", text) document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)