X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/716a9ab552bffbb7df2cb31ae41ee196902c7653..c7734daf3d6884c358f1644c1f8f7970453fafb1:/src/librarian/pdf.py diff --git a/src/librarian/pdf.py b/src/librarian/pdf.py index a51dbb5..b32395f 100644 --- a/src/librarian/pdf.py +++ b/src/librarian/pdf.py @@ -20,6 +20,7 @@ from copy import deepcopy from subprocess import call, PIPE from itertools import chain +from PIL import Image from Texml.processor import process from lxml import etree from lxml.etree import XMLSyntaxError, XSLTApplyError @@ -242,7 +243,7 @@ def package_available(package, args='', verbose=False): def transform(wldoc, verbose=False, save_tex=None, morefloats=None, - cover=None, flags=None, customizations=None, ilustr_path='', + cover=None, flags=None, customizations=None, base_url='file://./', latex_dir=False): """ produces a PDF file with XeLaTeX @@ -283,6 +284,14 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, elif package_available('morefloats', 'maxfloats=19'): root.set('morefloats', 'new') + if customizations is None: + customizations = [] + else: + customizations = list(customizations) + + if book_info.endnotes: + customizations.append('endnotes') + # add customizations if customizations is not None: root.set('customizations', u','.join(customizations)) @@ -305,6 +314,7 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, fix_hanging(document.edoc) fix_tables(document.edoc) mark_subauthors(document.edoc) + document.fix_pa_akap() # wl -> TeXML style_filename = get_stylesheet("wl2tex") @@ -314,8 +324,30 @@ def transform(wldoc, verbose=False, save_tex=None, morefloats=None, # TeXML -> LaTeX temp = mkdtemp('-wl2pdf') - for ilustr in document.edoc.findall("//ilustr"): - shutil.copy(os.path.join(ilustr_path, ilustr.get("src")), temp) + for i, ilustr in enumerate(document.edoc.findall('//ilustr')): + url = six.moves.urllib.parse.urljoin( + base_url, + ilustr.get('src') + ) + imgfile = six.moves.urllib.request.urlopen(url) + img = Image.open(imgfile) + + th_format, ext, media_type = { + 'GIF': ('GIF', 'gif', 'image/gif'), + 'PNG': ('PNG', 'png', 'image/png'), + }.get(img.format, ('JPEG', 'jpg', 'image/jpeg')) + + width = 2400 + if img.size[0] < width: + th = img + else: + th = img.resize((width, round(width * img.size[1] / img.size[0]))) + + file_name = 'image%d.%s' % (i, ext) + th.save(os.path.join(temp, file_name)) + ilustr.set('src', file_name) + + imgfile.close() for sponsor in book_info.sponsors: ins = etree.Element("data-sponsor", name=sponsor) @@ -393,7 +425,7 @@ def load_including_children(wldoc=None, provider=None, uri=None): """ if uri and provider: - f = provider.by_uri(uri) + f = provider.by_slug(uri.slug) text = f.read().decode('utf-8') f.close() elif wldoc is not None: @@ -404,7 +436,10 @@ def load_including_children(wldoc=None, provider=None, uri=None): 'Neither a WLDocument, nor provider and URI were provided.' ) + # Cyrrilic text = re.sub(r"([\u0400-\u04ff]+)", r"\1", text) + # Geometric shapes. + text = re.sub(r"([\u25a0-\u25ff]+)", r"\1", text) document = WLDocument.from_bytes(text.encode('utf-8'), parse_dublincore=True, provider=provider)