multiple audience in pdf/epub
[librarian.git] / librarian / formats / pdf / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 import shutil
8 from subprocess import call, PIPE
9 from tempfile import NamedTemporaryFile, mkdtemp
10 from lxml import etree
11 from urllib import urlretrieve
12 from StringIO import StringIO
13 from Texml.processor import process
14 from librarian import DCNS, XMLNamespace, BuildError
15 from librarian.formats import Format
16 from librarian.output import OutputFile
17 from librarian.renderers import Register, TreeRenderer
18 from librarian.utils import Context, get_resource
19 from librarian import core
20 from PIL import Image
21 from ..html import Silent
22
23
24 TexmlNS = XMLNamespace('http://getfo.sourceforge.net/texml/ns1')
25
26
27 def texml_cmd(name, *parms, **kwargs):
28     cmd = etree.Element(TexmlNS('cmd'), name=name)
29     for opt in kwargs.get('opts', []):
30         etree.SubElement(cmd, TexmlNS('opt')).text = opt
31     for parm in parms:
32         etree.SubElement(cmd, TexmlNS('parm')).text = parm
33     return cmd
34
35
36 class PdfFormat(Format):
37     format_name = 'PDF'
38     format_ext = 'pdf'
39     tex_passes = 1
40     style = get_resource('formats/pdf/res/default.sty')
41
42     local_packages = [
43         get_resource('formats/pdf/res/coverimage.sty'),
44         get_resource('formats/pdf/res/insertimage.sty'),
45     ]
46
47     renderers = Register()
48
49     def retrieve_file(self, url, save_as):
50         # TODO: local sheme
51         return False
52
53     def add_file(self, ctx, filename, url=None, path=None, image=False):
54         from subprocess import call
55         if not url and not path:
56             raise BuildError('No URL or path for image')
57         save_as = os.path.join(ctx.workdir, filename)
58         if path is not None:
59             ext = path.rsplit('.', 1)[-1]
60             if image:
61                 if ext == 'gif':
62                     call(['convert', path, save_as])
63                 else:
64                     # JPEGs with bad density will break LaTeX with 'Dimension too large'.
65                     call(['convert', '-units', 'PixelsPerInch', path, '-density', '300', save_as + '_.' + ext])
66                     shutil.move(save_as + '_.' + ext, save_as)
67             else:
68                 shutil.copy(path, save_as)
69         elif not self.retrieve_file(url, save_as):
70             if url.startswith('file://'):
71                 url = ctx.files_path + url[7:]
72
73             if url.startswith('/'):
74                 url = 'http://milpeer.eu' + url
75
76             if '.' not in url:
77                 raise BuildError('Linked file without extension: %s' % url)
78             ext = url.rsplit('.', 1)[-1]
79             if image:
80                 urlretrieve(url, save_as + '_.' + ext)
81                 if ext == 'gif':
82                     call(['convert', save_as + '_.' + ext, save_as])
83                 else:
84                     # JPEGs with bad density will break LaTeX with 'Dimension too large'.
85                     r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
86                               save_as + '_2.' + ext])
87                     if r:
88                         shutil.move(save_as + '_.' + ext, save_as)
89                     else:
90                         shutil.move(save_as + '_2.' + ext, save_as)
91             else:
92                 urlretrieve(url, save_as)
93
94     def get_file(self, ctx, filename):
95         return os.path.join(ctx.workdir, filename)
96
97     def get_texml(self, build_ctx):
98         t = etree.Element(TexmlNS('TeXML'))
99
100         self.add_file(build_ctx, 'wl.cls', path=get_resource('formats/pdf/res/wl.cls'))
101         t.append(texml_cmd("documentclass", "wl"))
102
103         # global packages
104         self.add_file(build_ctx, 'style.sty', path=self.style)
105         t.append(texml_cmd("usepackage", "style"))
106         t.append(texml_cmd("usepackage", "hyphenat"))
107
108         # local packages
109         for i, package in enumerate(self.local_packages):
110             self.add_file(build_ctx, "librarianlocalpackage%s.sty" % i, path=package)
111             t.append(texml_cmd("usepackage", "librarianlocalpackage%s" % i))
112
113         author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
114         title = self.doc.meta.title()
115         t.append(texml_cmd("author", author))
116         t.append(texml_cmd("title", title))
117
118         doc = etree.SubElement(t, TexmlNS('env'), name="document")
119         doc.append(texml_cmd("thispagestyle", "empty"))
120
121         # title page
122         height_left = 297
123         cover_url = self.doc.meta.get_one(DCNS('relation.coverimage.url'))
124         if cover_url:
125             self.add_file(build_ctx, 'cover.png', cover_url, image=True)
126             
127             img = Image.open(self.get_file(build_ctx, 'cover.png'))
128             size = img.size
129
130             if size[1] > size[0]:
131                 img = img.crop((0, 0, size[0], size[0]))
132                 img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90)
133             size = img.size
134
135             # TODO: hardcoded paper size here
136             height = 210.0 * size[1] / size[0]
137             doc.append(texml_cmd("makecover", "%fmm" % height))
138         else:
139             doc.append(texml_cmd("vfill*"))
140
141         # Wielkości!
142         grp = etree.SubElement(doc, 'group')
143         grp.append(texml_cmd("raggedright"))
144         grp.append(texml_cmd("vfill"))
145         if author:
146             p = texml_cmd("par", "")
147             grp.append(p)
148             p[0].append(texml_cmd("Large"))
149             p[0].append(texml_cmd("noindent"))
150             p[0].append(texml_cmd("nohyphens", author))
151             p[0].append(texml_cmd("vspace", "1em"))
152             # p[0][-1].tail = author
153         if title:
154             p = texml_cmd("par", "")
155             grp.append(p)
156             p[0].append(texml_cmd("Huge"))
157             p[0].append(texml_cmd("noindent"))
158             p[0].append(texml_cmd("nohyphens", title))
159             # p[0][-1].tail = title
160         doc.append(texml_cmd("vfill"))
161         doc.append(texml_cmd("vfill"))
162
163         # IOFile probably would be better
164         cover_logo_url = getattr(build_ctx, 'cover_logo', None)
165         # TEST
166         # TODO: convert
167         # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
168         if cover_logo_url:
169             self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
170             size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
171             p = texml_cmd("par", "")
172             doc.append(p)
173             p[0].append(texml_cmd("noindent"))
174             p[0].append(texml_cmd("insertimage", 'coverlogo.png', "%fcm" % (1.0 * size[0] / size[1]), "1cm"))
175             
176         # logo organizacji!
177         doc.append(texml_cmd("clearpage"))
178
179         ctx = Context(build_ctx, format=self, img=1)
180         doc.extend(self.render(self.doc.edoc.getroot(), ctx))
181
182         # Redakcyjna na końcu.
183         doc.append(texml_cmd("clearpage"))
184
185         doc.append(texml_cmd("section*", "Information about the resource"))
186         doc.append(texml_cmd("vspace", "1em"))
187
188         for m, f, multiple in (
189                 ('Publisher: ', DCNS('publisher'), False),
190                 ('Rights: ', DCNS('rights'), False),
191                 ('Intended audience: ', DCNS('audience'), True),
192                 ('', DCNS('description'), False)):
193             if multiple:
194                 v = ', '.join(self.doc.meta.get(f))
195             else:
196                 v = self.doc.meta.get_one(f)
197             if v:
198                 e = texml_cmd("par", "")
199                 e[0].append(texml_cmd("noindent"))
200                 e[0][0].tail = "%s%s" % (m, v)
201                 doc.append(e)
202                 doc.append(texml_cmd("vspace", "1em"))
203
204         e = texml_cmd("par", "")
205         e[0].append(texml_cmd("noindent"))
206         e[0][0].tail = "Resource prepared using "
207         e[0].append(texml_cmd("href", "http://milpeer.eu", "MIL/PEER"))
208         e[0][-1].tail = " editing platform. "
209         doc.append(e)
210
211         source_url = getattr(build_ctx, 'source_url', None)
212         # source_url = 'http://milpeer.mdrn.pl/documents/27/'
213         if source_url:
214             e = texml_cmd("par", "")
215             doc.append(e)
216             e[0].append(texml_cmd("noindent"))
217             e[0][0].tail = "Source available at "
218             e[0].append(texml_cmd("href", source_url, source_url))
219
220         return t
221
222     def get_tex_dir(self, ctx):
223         ctx.workdir = mkdtemp('-wl2pdf')
224         texml = self.get_texml(ctx)
225         tex_path = os.path.join(ctx.workdir, 'doc.tex')
226         with open(tex_path, 'w') as fout:
227             # print etree.tostring(texml)
228             process(StringIO(etree.tostring(texml)), fout, 'utf-8')
229
230         # if self.save_tex:
231         #     shutil.copy(tex_path, self.save_tex)
232
233         # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
234         #     shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
235         return ctx.workdir
236
237     def build(self, ctx=None, verbose=False):
238         temp = self.get_tex_dir(ctx)
239         tex_path = os.path.join(temp, 'doc.tex')
240         try:
241             cwd = os.getcwd()
242         except OSError:
243             cwd = None
244         os.chdir(temp)
245
246         if verbose:
247             for i in range(self.tex_passes):
248                 p = call(['xelatex', tex_path])
249         else:
250             for i in range(self.tex_passes):
251                 p = call(['xelatex', '-interaction=batchmode', tex_path],
252                          stdout=PIPE, stderr=PIPE)
253         if p:
254             # raise ParseError("Error parsing .tex file: %s" % tex_path)
255             raise RuntimeError("Error parsing .tex file: %s" % tex_path)
256
257         if cwd is not None:
258             os.chdir(cwd)
259
260         output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
261         pdf_path = os.path.join(temp, 'doc.pdf')
262         shutil.move(pdf_path, output_file.name)
263         shutil.rmtree(temp)
264         os.system("ls -l " + output_file.name)
265         return OutputFile.from_filename(output_file.name)
266     
267     def render(self, element, ctx):
268         return self.renderers.get_for(element).render(element, ctx)
269
270
271 class CmdRenderer(TreeRenderer):
272     def parms(self):
273         return []
274
275     def container(self):
276         root = etree.Element(self.root_name)
277         root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
278         inner = root[0][-1]
279         return root, inner
280
281
282 class EnvRenderer(TreeRenderer):
283     def container(self):
284         root = etree.Element(self.root_name)
285         inner = etree.SubElement(root, 'env', name=self.tag_name)
286         return root, inner
287
288
289 class GroupRenderer(CmdRenderer):
290     def container(self):
291         root = etree.Element(self.root_name)
292         inner = etree.SubElement(root, 'group')
293         if self.tag_name:
294             inner.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
295         return root, inner
296
297
298 class SectionRenderer(CmdRenderer):
299     def subcontext(self, element, ctx):
300         # here?
301         return Context(ctx, toc_level=getattr(ctx, 'toc_level', 1) + 2)
302
303     def container(self):
304         root = etree.Element(self.root_name)
305         root.append(texml_cmd('pagebreak', opts=['1']))
306         root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
307         inner = root[1][0]
308         return root, inner
309
310 PdfFormat.renderers.register(core.Section, None, SectionRenderer('par'))
311
312 # TODO: stopnie
313 PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*'))
314
315 PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
316
317
318 class ImgRenderer(CmdRenderer):
319     def parms(self):
320         return ["", ""]
321
322     def render(self, element, ctx):
323         root = super(ImgRenderer, self).render(element, ctx)
324         url = element.get('src')
325         nr = getattr(ctx, 'img', 0)
326         ctx.img = nr + 1
327         ctx.format.add_file(ctx, 'f%d.png' % nr, url, image=True)
328         root[0][0].text = 'f%d.png' % nr
329         try:
330             size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
331         except IOError:  # not an image
332             del root[0]
333             return root
334         root[0][1].text = '15cm'
335         root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
336         return root
337
338 PdfFormat.renderers.register(core.Div, 'img', ImgRenderer('insertimage'))
339
340
341 class VideoRenderer(CmdRenderer):
342     def render(self, element, ctx):
343         root = super(VideoRenderer, self).render(element, ctx)
344         url = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid')
345         link = texml_cmd('href', url, url)
346         root[0][0].text = None
347         root[0][0].append(link)
348         return root
349
350 PdfFormat.renderers.register(core.Div, 'video', VideoRenderer('par'))
351
352
353 PdfFormat.renderers.register(core.Div, 'defined', CmdRenderer('textbf'))
354 PdfFormat.renderers.register(core.Div, 'item', CmdRenderer('item'))
355 PdfFormat.renderers.register(core.Span, 'item', CmdRenderer('item'))
356 PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize'))
357 PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
358
359
360 PdfFormat.renderers.register(core.Span, None, TreeRenderer())
361 PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
362 PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
363 PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
364 PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
365
366
367 class SpanUri(CmdRenderer):
368     def parms(self):
369         return [""]
370
371     def render(self, element, ctx):
372         root = super(SpanUri, self).render(element, ctx)
373         src = element.text
374         if src.startswith('file://'):
375             src = ctx.files_path + src[7:]
376         root[0][0].text = src
377         return root
378 PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
379
380
381 class SpanLink(CmdRenderer):
382     def parms(self):
383         return [""]
384
385     def render(self, element, ctx):
386         root = super(SpanLink, self).render(element, ctx)
387         src = element.attrib.get('href', '')
388         if src.startswith('file://'):
389             src = ctx.files_path + src[7:]
390         root[0][0].text = src
391         return root
392 PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
393
394
395 PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
396 PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
397 PdfFormat.renderers.register(core.Aside, 'comment', Silent())