fix video
[librarian.git] / librarian / formats / pdf / __init__.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import os
7 import shutil
8 from subprocess import call, PIPE
9 from tempfile import NamedTemporaryFile, mkdtemp
10 from lxml import etree
11 from urllib import urlretrieve
12 from StringIO import StringIO
13 from Texml.processor import process
14 from librarian import DCNS, XMLNamespace, BuildError, get_provider
15 from librarian.formats import Format
16 from librarian.output import OutputFile
17 from librarian.renderers import Register, TreeRenderer
18 from librarian.utils import Context, get_resource
19 from librarian import core
20 from PIL import Image
21 from ..html import Silent
22
23
24 TexmlNS = XMLNamespace('http://getfo.sourceforge.net/texml/ns1')
25
26
27 def texml_cmd(name, *parms, **kwargs):
28     cmd = etree.Element(TexmlNS('cmd'), name=name)
29     for opt in kwargs.get('opts', []):
30         etree.SubElement(cmd, TexmlNS('opt')).text = opt
31     for parm in parms:
32         etree.SubElement(cmd, TexmlNS('parm')).text = parm
33     return cmd
34
35
36 class PdfFormat(Format):
37     format_name = 'PDF'
38     format_ext = 'pdf'
39     tex_passes = 1
40     style = get_resource('formats/pdf/res/default.sty')
41
42     local_packages = [
43         get_resource('formats/pdf/res/coverimage.sty'),
44         get_resource('formats/pdf/res/insertimage.sty'),
45     ]
46
47     renderers = Register()
48
49     def retrieve_file(self, url, save_as):
50         # TODO: local sheme
51         return False
52
53     def add_file(self, ctx, filename, url=None, path=None, image=False):
54         from subprocess import call
55         if not url and not path:
56             raise BuildError('No URL or path for image')
57         save_as = os.path.join(ctx.workdir, filename)
58         if path is not None:
59             ext = path.rsplit('.', 1)[-1]
60             if image:
61                 if ext == 'gif':
62                     call(['convert', path, save_as])
63                 else:
64                     # JPEGs with bad density will break LaTeX with 'Dimension too large'.
65                     call(['convert', '-units', 'PixelsPerInch', path, '-density', '300', save_as + '_.' + ext])
66                     shutil.move(save_as + '_.' + ext, save_as)
67             else:
68                 shutil.copy(path, save_as)
69         elif not self.retrieve_file(url, save_as):
70             if url.startswith('file://'):
71                 url = ctx.files_path + url[7:]
72
73             if url.startswith('/'):
74                 url = 'http://milpeer.eu' + url
75
76             if '.' not in url:
77                 raise BuildError('Linked file without extension: %s' % url)
78             ext = url.rsplit('.', 1)[-1]
79             if image:
80                 urlretrieve(url, save_as + '_.' + ext)
81                 if ext == 'gif':
82                     call(['convert', save_as + '_.' + ext, save_as])
83                 else:
84                     # JPEGs with bad density will break LaTeX with 'Dimension too large'.
85                     r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
86                               save_as + '_2.' + ext])
87                     if r:
88                         shutil.move(save_as + '_.' + ext, save_as)
89                     else:
90                         shutil.move(save_as + '_2.' + ext, save_as)
91             else:
92                 urlretrieve(url, save_as)
93
94     def get_file(self, ctx, filename):
95         return os.path.join(ctx.workdir, filename)
96
97     def get_texml(self, build_ctx):
98         t = etree.Element(TexmlNS('TeXML'))
99
100         self.add_file(build_ctx, 'wl.cls', path=get_resource('formats/pdf/res/wl.cls'))
101         t.append(texml_cmd("documentclass", "wl"))
102
103         # global packages
104         self.add_file(build_ctx, 'style.sty', path=self.style)
105         t.append(texml_cmd("usepackage", "style"))
106         t.append(texml_cmd("usepackage", "hyphenat"))
107
108         # local packages
109         for i, package in enumerate(self.local_packages):
110             self.add_file(build_ctx, "librarianlocalpackage%s.sty" % i, path=package)
111             t.append(texml_cmd("usepackage", "librarianlocalpackage%s" % i))
112
113         author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
114         title = self.doc.meta.title()
115         t.append(texml_cmd("author", author))
116         t.append(texml_cmd("title", title))
117         t.append(texml_cmd("organization", build_ctx.organization))
118
119         doc = etree.SubElement(t, TexmlNS('env'), name="document")
120
121         # Wielkości!
122         title_field = texml_cmd("titlefield", "")
123         doc.append(title_field)
124         grp = title_field[0]
125         grp.append(texml_cmd("raggedright"))
126         grp.append(texml_cmd("vfill"))
127         if author:
128             p = texml_cmd("par", "")
129             grp.append(p)
130             p[0].append(texml_cmd("Large"))
131             p[0].append(texml_cmd("noindent"))
132             p[0].append(texml_cmd("nohyphens", author))
133             p[0].append(texml_cmd("vspace", "1em"))
134             # p[0][-1].tail = author
135         if title:
136             p = texml_cmd("par", "")
137             grp.append(p)
138             p[0].append(texml_cmd("Huge"))
139             p[0].append(texml_cmd("noindent"))
140             p[0].append(texml_cmd("nohyphens", title))
141             # p[0][-1].tail = title
142
143         # IOFile probably would be better
144         cover_logo_url = getattr(build_ctx, 'cover_logo', None)
145         # TEST
146         # TODO: convert
147         # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
148         if cover_logo_url:
149             self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
150             size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
151             doc.append(texml_cmd("toplogo", 'coverlogo.png', "%fcm" % (2.0 * size[0] / size[1]), "2cm"))
152
153         doc.append(texml_cmd("vspace", "2em"))
154
155         ctx = Context(build_ctx, format=self, img=1)
156         root = self.doc.edoc.getroot()
157         root.remove(root[1])
158         doc.extend(self.render(root, ctx))
159
160         # Redakcyjna na końcu.
161         doc.append(texml_cmd("section*", "Information about the resource"))
162         doc.append(texml_cmd("vspace", "1em"))
163
164         for m, f, multiple in (
165                 ('Publisher: ', DCNS('publisher'), False),
166                 ('Rights: ', DCNS('rights'), False),
167                 ('', DCNS('description'), False)):
168             if multiple:
169                 v = ', '.join(self.doc.meta.get(f))
170             else:
171                 v = self.doc.meta.get_one(f)
172             if v:
173                 e = texml_cmd("par", "")
174                 e[0].append(texml_cmd("noindent"))
175                 e[0][0].tail = "%s%s" % (m, v)
176                 doc.append(e)
177                 doc.append(texml_cmd("vspace", "1em"))
178
179         e = texml_cmd("par", "")
180         e[0].append(texml_cmd("noindent"))
181         e[0][0].tail = "Resource prepared using "
182         e[0].append(texml_cmd("href", "http://milpeer.eu", "MIL/PEER"))
183         e[0][-1].tail = " editing platform. "
184         doc.append(e)
185
186         source_url = getattr(build_ctx, 'source_url', None)
187         # source_url = 'http://milpeer.mdrn.pl/documents/27/'
188         if source_url:
189             e = texml_cmd("par", "")
190             doc.append(e)
191             e[0].append(texml_cmd("noindent"))
192             e[0][0].tail = "Source available at "
193             e[0].append(texml_cmd("href", source_url, source_url))
194
195         return t
196
197     def get_tex_dir(self, ctx):
198         ctx.workdir = mkdtemp('-wl2pdf')
199         texml = self.get_texml(ctx)
200         tex_path = os.path.join(ctx.workdir, 'doc.tex')
201         with open(tex_path, 'w') as fout:
202             # print etree.tostring(texml)
203             process(StringIO(etree.tostring(texml)), fout, 'utf-8')
204
205         # if self.save_tex:
206         #     shutil.copy(tex_path, self.save_tex)
207
208         # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
209         #     shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
210         return ctx.workdir
211
212     def build(self, ctx=None, verbose=False):
213         temp = self.get_tex_dir(ctx)
214         tex_path = os.path.join(temp, 'doc.tex')
215         try:
216             cwd = os.getcwd()
217         except OSError:
218             cwd = None
219         os.chdir(temp)
220
221         if verbose:
222             for i in range(self.tex_passes):
223                 p = call(['xelatex', tex_path])
224         else:
225             for i in range(self.tex_passes):
226                 p = call(['xelatex', '-interaction=batchmode', tex_path],
227                          stdout=PIPE, stderr=PIPE)
228         if p:
229             # raise ParseError("Error parsing .tex file: %s" % tex_path)
230             raise RuntimeError("Error parsing .tex file: %s" % tex_path)
231
232         if cwd is not None:
233             os.chdir(cwd)
234
235         output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
236         pdf_path = os.path.join(temp, 'doc.pdf')
237         shutil.move(pdf_path, output_file.name)
238         shutil.rmtree(temp)
239         os.system("ls -l " + output_file.name)
240         return OutputFile.from_filename(output_file.name)
241     
242     def render(self, element, ctx):
243         return self.renderers.get_for(element).render(element, ctx)
244
245
246 class CmdRenderer(TreeRenderer):
247     def parms(self):
248         return []
249
250     def container(self):
251         root = etree.Element(self.root_name)
252         root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
253         inner = root[0][-1]
254         return root, inner
255
256
257 class EnvRenderer(TreeRenderer):
258     def container(self):
259         root = etree.Element(self.root_name)
260         inner = etree.SubElement(root, 'env', name=self.tag_name)
261         return root, inner
262
263
264 class GroupRenderer(CmdRenderer):
265     def container(self):
266         root = etree.Element(self.root_name)
267         inner = etree.SubElement(root, 'group')
268         if self.tag_name:
269             inner.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
270         return root, inner
271
272
273 class SectionRenderer(CmdRenderer):
274     def subcontext(self, element, ctx):
275         # here?
276         return Context(ctx, toc_level=getattr(ctx, 'toc_level', 1) + 2)
277
278     def container(self):
279         root = etree.Element(self.root_name)
280         root.append(texml_cmd('pagebreak', opts=['1']))
281         root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
282         inner = root[1][0]
283         return root, inner
284
285 PdfFormat.renderers.register(core.Section, None, SectionRenderer('par'))
286
287 # TODO: stopnie
288 PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*'))
289
290 PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
291
292
293 class ImgRenderer(CmdRenderer):
294     def parms(self):
295         return ["", ""]
296
297     def render(self, element, ctx):
298         root = super(ImgRenderer, self).render(element, ctx)
299         url = element.get('src')
300         nr = getattr(ctx, 'img', 0)
301         ctx.img = nr + 1
302         ctx.format.add_file(ctx, 'f%d.png' % nr, url, image=True)
303         root[0][0].text = 'f%d.png' % nr
304         try:
305             size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
306         except IOError:  # not an image
307             del root[0]
308             return root
309         root[0][1].text = '15cm'
310         root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
311         return root
312
313 PdfFormat.renderers.register(core.Div, 'img', ImgRenderer('insertimage'))
314
315
316 class VideoRenderer(CmdRenderer):
317     def render(self, element, ctx):
318         root = super(VideoRenderer, self).render(element, ctx)
319         url = get_provider(element.attrib.get('provider'))['url'] % element.attrib.get('videoid', '')
320         link = texml_cmd('href', url, url)
321         root[0][0].text = None
322         root[0][0].append(link)
323         return root
324
325 PdfFormat.renderers.register(core.Div, 'video', VideoRenderer('par'))
326
327
328 PdfFormat.renderers.register(core.Div, 'defined', CmdRenderer('textbf'))
329 PdfFormat.renderers.register(core.Div, 'item', CmdRenderer('item'))
330 PdfFormat.renderers.register(core.Span, 'item', CmdRenderer('item'))
331 PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize'))
332 PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
333
334
335 PdfFormat.renderers.register(core.Span, None, TreeRenderer())
336 PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
337 PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
338 PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
339 PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
340
341
342 class SpanUri(CmdRenderer):
343     def parms(self):
344         return [""]
345
346     def render(self, element, ctx):
347         root = super(SpanUri, self).render(element, ctx)
348         src = element.text
349         if src.startswith('file://'):
350             src = ctx.files_path + src[7:]
351         root[0][0].text = src
352         return root
353 PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
354
355
356 class SpanLink(CmdRenderer):
357     def parms(self):
358         return [""]
359
360     def render(self, element, ctx):
361         root = super(SpanLink, self).render(element, ctx)
362         src = element.attrib.get('href', '')
363         if src.startswith('file://'):
364             src = ctx.files_path + src[7:]
365         root[0][0].text = src
366         return root
367 PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
368
369
370 PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
371 PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
372 PdfFormat.renderers.register(core.Aside, 'comment', Silent())