1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
8 from subprocess import call, PIPE
9 from tempfile import NamedTemporaryFile, mkdtemp
10 from lxml import etree
11 from urllib import urlretrieve
12 from StringIO import StringIO
13 from Texml.processor import process
14 from librarian import DCNS, XMLNamespace, BuildError
15 from librarian.formats import Format
16 from librarian.output import OutputFile
17 from librarian.renderers import Register, TreeRenderer
18 from librarian.utils import Context, get_resource
19 from librarian import core
21 from ..html import Silent
24 TexmlNS = XMLNamespace('http://getfo.sourceforge.net/texml/ns1')
27 def texml_cmd(name, *parms, **kwargs):
28 cmd = etree.Element(TexmlNS('cmd'), name=name)
29 for opt in kwargs.get('opts', []):
30 etree.SubElement(cmd, TexmlNS('opt')).text = opt
32 etree.SubElement(cmd, TexmlNS('parm')).text = parm
36 class PdfFormat(Format):
40 style = get_resource('formats/pdf/res/default.sty')
43 get_resource('formats/pdf/res/coverimage.sty'),
44 get_resource('formats/pdf/res/insertimage.sty'),
47 renderers = Register()
49 def retrieve_file(self, url, save_as):
53 def add_file(self, ctx, filename, url=None, path=None, image=False):
54 from subprocess import call
55 if not url and not path:
56 raise BuildError('No URL or path for image')
57 save_as = os.path.join(ctx.workdir, filename)
59 ext = path.rsplit('.', 1)[-1]
62 call(['convert', path, save_as])
64 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
65 call(['convert', '-units', 'PixelsPerInch', path, '-density', '300', save_as + '_.' + ext])
66 shutil.move(save_as + '_.' + ext, save_as)
68 shutil.copy(path, save_as)
69 elif not self.retrieve_file(url, save_as):
70 if url.startswith('file://'):
71 url = ctx.files_path + url[7:]
73 if url.startswith('/'):
74 url = 'http://milpeer.eu' + url
77 raise BuildError('Linked file without extension: %s' % url)
78 ext = url.rsplit('.', 1)[-1]
80 urlretrieve(url, save_as + '_.' + ext)
82 call(['convert', save_as + '_.' + ext, save_as])
84 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
85 r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
86 save_as + '_2.' + ext])
88 shutil.move(save_as + '_.' + ext, save_as)
90 shutil.move(save_as + '_2.' + ext, save_as)
92 urlretrieve(url, save_as)
94 def get_file(self, ctx, filename):
95 return os.path.join(ctx.workdir, filename)
97 def get_texml(self, build_ctx):
98 t = etree.Element(TexmlNS('TeXML'))
100 self.add_file(build_ctx, 'wl.cls', path=get_resource('formats/pdf/res/wl.cls'))
101 t.append(texml_cmd("documentclass", "wl"))
104 self.add_file(build_ctx, 'style.sty', path=self.style)
105 t.append(texml_cmd("usepackage", "style"))
106 t.append(texml_cmd("usepackage", "hyphenat"))
109 for i, package in enumerate(self.local_packages):
110 self.add_file(build_ctx, "librarianlocalpackage%s.sty" % i, path=package)
111 t.append(texml_cmd("usepackage", "librarianlocalpackage%s" % i))
113 author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
114 title = self.doc.meta.title()
115 t.append(texml_cmd("author", author))
116 t.append(texml_cmd("title", title))
118 doc = etree.SubElement(t, TexmlNS('env'), name="document")
119 doc.append(texml_cmd("thispagestyle", "empty"))
123 cover_url = self.doc.meta.get_one(DCNS('relation.coverimage.url'))
125 self.add_file(build_ctx, 'cover.png', cover_url, image=True)
127 img = Image.open(self.get_file(build_ctx, 'cover.png'))
130 if size[1] > size[0]:
131 img = img.crop((0, 0, size[0], size[0]))
132 img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90)
135 # TODO: hardcoded paper size here
136 height = 210.0 * size[1] / size[0]
137 doc.append(texml_cmd("makecover", "%fmm" % height))
139 doc.append(texml_cmd("vfill*"))
142 grp = etree.SubElement(doc, 'group')
143 grp.append(texml_cmd("raggedright"))
144 grp.append(texml_cmd("vfill"))
146 p = texml_cmd("par", "")
148 p[0].append(texml_cmd("Large"))
149 p[0].append(texml_cmd("noindent"))
150 p[0].append(texml_cmd("nohyphens", author))
151 p[0].append(texml_cmd("vspace", "1em"))
152 # p[0][-1].tail = author
154 p = texml_cmd("par", "")
156 p[0].append(texml_cmd("Huge"))
157 p[0].append(texml_cmd("noindent"))
158 p[0].append(texml_cmd("nohyphens", title))
159 # p[0][-1].tail = title
160 doc.append(texml_cmd("vfill"))
161 doc.append(texml_cmd("vfill"))
163 # IOFile probably would be better
164 cover_logo_url = getattr(build_ctx, 'cover_logo', None)
167 # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
169 self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
170 size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
171 p = texml_cmd("par", "")
173 p[0].append(texml_cmd("noindent"))
174 p[0].append(texml_cmd("insertimage", 'coverlogo.png', "%fcm" % (1.0 * size[0] / size[1]), "1cm"))
177 doc.append(texml_cmd("clearpage"))
179 ctx = Context(build_ctx, format=self, img=1)
180 doc.extend(self.render(self.doc.edoc.getroot(), ctx))
182 # Redakcyjna na końcu.
183 doc.append(texml_cmd("clearpage"))
185 doc.append(texml_cmd("section*", "Information about the resource"))
186 doc.append(texml_cmd("vspace", "1em"))
189 ('Publisher: ', DCNS('publisher')),
190 ('Rights: ', DCNS('rights')),
191 ('Intended audience: ', DCNS('audience')),
192 ('', DCNS('description'))):
193 v = self.doc.meta.get_one(f)
195 e = texml_cmd("par", "")
196 e[0].append(texml_cmd("noindent"))
197 e[0][0].tail = "%s%s" % (m, v)
199 doc.append(texml_cmd("vspace", "1em"))
201 e = texml_cmd("par", "")
202 e[0].append(texml_cmd("noindent"))
203 e[0][0].tail = "Resource prepared using "
204 e[0].append(texml_cmd("href", "http://milpeer.eu", "MIL/PEER"))
205 e[0][-1].tail = " editing platform. "
208 source_url = getattr(build_ctx, 'source_url', None)
209 # source_url = 'http://milpeer.mdrn.pl/documents/27/'
211 e = texml_cmd("par", "")
213 e[0].append(texml_cmd("noindent"))
214 e[0][0].tail = "Source available at "
215 e[0].append(texml_cmd("href", source_url, source_url))
219 def get_tex_dir(self, ctx):
220 ctx.workdir = mkdtemp('-wl2pdf')
221 texml = self.get_texml(ctx)
222 tex_path = os.path.join(ctx.workdir, 'doc.tex')
223 with open(tex_path, 'w') as fout:
224 # print etree.tostring(texml)
225 process(StringIO(etree.tostring(texml)), fout, 'utf-8')
228 # shutil.copy(tex_path, self.save_tex)
230 # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
231 # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
234 def build(self, ctx=None, verbose=False):
235 temp = self.get_tex_dir(ctx)
236 tex_path = os.path.join(temp, 'doc.tex')
244 for i in range(self.tex_passes):
245 p = call(['xelatex', tex_path])
247 for i in range(self.tex_passes):
248 p = call(['xelatex', '-interaction=batchmode', tex_path],
249 stdout=PIPE, stderr=PIPE)
251 # raise ParseError("Error parsing .tex file: %s" % tex_path)
252 raise RuntimeError("Error parsing .tex file: %s" % tex_path)
257 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
258 pdf_path = os.path.join(temp, 'doc.pdf')
259 shutil.move(pdf_path, output_file.name)
261 os.system("ls -l " + output_file.name)
262 return OutputFile.from_filename(output_file.name)
264 def render(self, element, ctx):
265 return self.renderers.get_for(element).render(element, ctx)
268 class CmdRenderer(TreeRenderer):
273 root = etree.Element(self.root_name)
274 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
279 class EnvRenderer(TreeRenderer):
281 root = etree.Element(self.root_name)
282 inner = etree.SubElement(root, 'env', name=self.tag_name)
286 class GroupRenderer(CmdRenderer):
288 root = etree.Element(self.root_name)
289 inner = etree.SubElement(root, 'group')
291 inner.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
295 class SectionRenderer(CmdRenderer):
296 def subcontext(self, element, ctx):
298 return Context(ctx, toc_level=getattr(ctx, 'toc_level', 1) + 2)
301 root = etree.Element(self.root_name)
302 root.append(texml_cmd('pagebreak', opts=['1']))
303 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
307 PdfFormat.renderers.register(core.Section, None, SectionRenderer('par'))
310 PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*'))
312 PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
315 class ImgRenderer(CmdRenderer):
319 def render(self, element, ctx):
320 root = super(ImgRenderer, self).render(element, ctx)
321 url = element.get('src')
322 nr = getattr(ctx, 'img', 0)
324 ctx.format.add_file(ctx, 'f%d.png' % nr, url, image=True)
325 root[0][0].text = 'f%d.png' % nr
327 size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
328 except IOError: # not an image
331 root[0][1].text = '15cm'
332 root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
335 PdfFormat.renderers.register(core.Div, 'img', ImgRenderer('insertimage'))
338 class VideoRenderer(CmdRenderer):
339 def render(self, element, ctx):
340 root = super(VideoRenderer, self).render(element, ctx)
341 url = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid')
342 link = texml_cmd('href', url, url)
343 root[0][0].text = None
344 root[0][0].append(link)
347 PdfFormat.renderers.register(core.Div, 'video', VideoRenderer('par'))
350 PdfFormat.renderers.register(core.Div, 'defined', CmdRenderer('textbf'))
351 PdfFormat.renderers.register(core.Div, 'item', CmdRenderer('item'))
352 PdfFormat.renderers.register(core.Span, 'item', CmdRenderer('item'))
353 PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize'))
354 PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
357 PdfFormat.renderers.register(core.Span, None, TreeRenderer())
358 PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
359 PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
360 PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
361 PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
364 class SpanUri(CmdRenderer):
368 def render(self, element, ctx):
369 root = super(SpanUri, self).render(element, ctx)
371 if src.startswith('file://'):
372 src = ctx.files_path + src[7:]
373 root[0][0].text = src
375 PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
378 class SpanLink(CmdRenderer):
382 def render(self, element, ctx):
383 root = super(SpanLink, self).render(element, ctx)
384 src = element.attrib.get('href', '')
385 if src.startswith('file://'):
386 src = ctx.files_path + src[7:]
387 root[0][0].text = src
389 PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
392 PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
393 PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
394 PdfFormat.renderers.register(core.Aside, 'comment', Silent())