1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
8 from subprocess import call, PIPE
9 from tempfile import NamedTemporaryFile, mkdtemp
10 from lxml import etree
11 from urllib import urlretrieve
12 from StringIO import StringIO
13 from Texml.processor import process
14 from librarian import DCNS, XMLNamespace, BuildError
15 from librarian.formats import Format
16 from librarian.output import OutputFile
17 from librarian.renderers import Register, TreeRenderer
18 from librarian.utils import Context, get_resource
19 from librarian import core
21 from ..html import Silent
24 TexmlNS = XMLNamespace('http://getfo.sourceforge.net/texml/ns1')
27 def texml_cmd(name, *parms, **kwargs):
28 cmd = etree.Element(TexmlNS('cmd'), name=name)
29 for opt in kwargs.get('opts', []):
30 etree.SubElement(cmd, TexmlNS('opt')).text = opt
32 etree.SubElement(cmd, TexmlNS('parm')).text = parm
36 class PdfFormat(Format):
40 style = get_resource('formats/pdf/res/default.sty')
43 get_resource('formats/pdf/res/coverimage.sty'),
44 get_resource('formats/pdf/res/insertimage.sty'),
47 renderers = Register()
49 def retrieve_file(self, url, save_as):
53 def add_file(self, ctx, filename, url=None, path=None, image=False):
54 from subprocess import call
55 if not url and not path:
56 raise BuildError('No URL or path for image')
57 save_as = os.path.join(ctx.workdir, filename)
59 ext = path.rsplit('.', 1)[-1]
62 call(['convert', path, save_as])
64 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
65 call(['convert', '-units', 'PixelsPerInch', path, '-density', '300', save_as + '_.' + ext])
66 shutil.move(save_as + '_.' + ext, save_as)
68 shutil.copy(path, save_as)
69 elif not self.retrieve_file(url, save_as):
70 if url.startswith('file://'):
71 url = ctx.files_path + url[7:]
73 if url.startswith('/'):
74 url = 'http://milpeer.eu' + url
77 raise BuildError('Linked file without extension: %s' % url)
78 ext = url.rsplit('.', 1)[-1]
80 urlretrieve(url, save_as + '_.' + ext)
82 call(['convert', save_as + '_.' + ext, save_as])
84 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
85 r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
86 save_as + '_2.' + ext])
88 shutil.move(save_as + '_.' + ext, save_as)
90 shutil.move(save_as + '_2.' + ext, save_as)
92 urlretrieve(url, save_as)
94 def get_file(self, ctx, filename):
95 return os.path.join(ctx.workdir, filename)
97 def get_texml(self, build_ctx):
98 t = etree.Element(TexmlNS('TeXML'))
100 self.add_file(build_ctx, 'wl.cls', path=get_resource('formats/pdf/res/wl.cls'))
101 t.append(texml_cmd("documentclass", "wl"))
104 self.add_file(build_ctx, 'style.sty', path=self.style)
105 t.append(texml_cmd("usepackage", "style"))
106 t.append(texml_cmd("usepackage", "hyphenat"))
109 for i, package in enumerate(self.local_packages):
110 self.add_file(build_ctx, "librarianlocalpackage%s.sty" % i, path=package)
111 t.append(texml_cmd("usepackage", "librarianlocalpackage%s" % i))
113 author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
114 title = self.doc.meta.title()
115 t.append(texml_cmd("author", author))
116 t.append(texml_cmd("title", title))
118 doc = etree.SubElement(t, TexmlNS('env'), name="document")
119 doc.append(texml_cmd("thispagestyle", "empty"))
123 cover_url = self.doc.meta.get_one(DCNS('relation.coverimage.url'))
125 self.add_file(build_ctx, 'cover.png', cover_url, image=True)
127 img = Image.open(self.get_file(build_ctx, 'cover.png'))
130 if size[1] > size[0]:
131 img = img.crop((0, 0, size[0], size[0]))
132 img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90)
135 # TODO: hardcoded paper size here
136 height = 210.0 * size[1] / size[0]
137 doc.append(texml_cmd("makecover", "%fmm" % height))
139 doc.append(texml_cmd("vfill*"))
142 grp = etree.SubElement(doc, 'group')
143 grp.append(texml_cmd("raggedright"))
144 grp.append(texml_cmd("vfill"))
146 p = texml_cmd("par", "")
148 p[0].append(texml_cmd("Large"))
149 p[0].append(texml_cmd("noindent"))
150 p[0].append(texml_cmd("nohyphens", author))
151 p[0].append(texml_cmd("vspace", "1em"))
152 # p[0][-1].tail = author
154 p = texml_cmd("par", "")
156 p[0].append(texml_cmd("Huge"))
157 p[0].append(texml_cmd("noindent"))
158 p[0].append(texml_cmd("nohyphens", title))
159 # p[0][-1].tail = title
160 doc.append(texml_cmd("vfill"))
161 doc.append(texml_cmd("vfill"))
163 # IOFile probably would be better
164 cover_logo_url = getattr(build_ctx, 'cover_logo', None)
167 # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
169 self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
170 size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
171 p = texml_cmd("par", "")
173 p[0].append(texml_cmd("noindent"))
174 p[0].append(texml_cmd("insertimage", 'coverlogo.png', "%fcm" % (1.0 * size[0] / size[1]), "1cm"))
177 doc.append(texml_cmd("clearpage"))
179 ctx = Context(build_ctx, format=self, img=1)
180 doc.extend(self.render(self.doc.edoc.getroot(), ctx))
182 # Redakcyjna na końcu.
183 doc.append(texml_cmd("clearpage"))
185 doc.append(texml_cmd("section*", "Information about the resource"))
186 doc.append(texml_cmd("vspace", "1em"))
188 for m, f, multiple in (
189 ('Publisher: ', DCNS('publisher'), False),
190 ('Rights: ', DCNS('rights'), False),
191 ('Intended audience: ', DCNS('audience'), True),
192 ('', DCNS('description'), False)):
194 v = ', '.join(self.doc.meta.get(f))
196 v = self.doc.meta.get_one(f)
198 e = texml_cmd("par", "")
199 e[0].append(texml_cmd("noindent"))
200 e[0][0].tail = "%s%s" % (m, v)
202 doc.append(texml_cmd("vspace", "1em"))
204 e = texml_cmd("par", "")
205 e[0].append(texml_cmd("noindent"))
206 e[0][0].tail = "Resource prepared using "
207 e[0].append(texml_cmd("href", "http://milpeer.eu", "MIL/PEER"))
208 e[0][-1].tail = " editing platform. "
211 source_url = getattr(build_ctx, 'source_url', None)
212 # source_url = 'http://milpeer.mdrn.pl/documents/27/'
214 e = texml_cmd("par", "")
216 e[0].append(texml_cmd("noindent"))
217 e[0][0].tail = "Source available at "
218 e[0].append(texml_cmd("href", source_url, source_url))
222 def get_tex_dir(self, ctx):
223 ctx.workdir = mkdtemp('-wl2pdf')
224 texml = self.get_texml(ctx)
225 tex_path = os.path.join(ctx.workdir, 'doc.tex')
226 with open(tex_path, 'w') as fout:
227 # print etree.tostring(texml)
228 process(StringIO(etree.tostring(texml)), fout, 'utf-8')
231 # shutil.copy(tex_path, self.save_tex)
233 # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
234 # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
237 def build(self, ctx=None, verbose=False):
238 temp = self.get_tex_dir(ctx)
239 tex_path = os.path.join(temp, 'doc.tex')
247 for i in range(self.tex_passes):
248 p = call(['xelatex', tex_path])
250 for i in range(self.tex_passes):
251 p = call(['xelatex', '-interaction=batchmode', tex_path],
252 stdout=PIPE, stderr=PIPE)
254 # raise ParseError("Error parsing .tex file: %s" % tex_path)
255 raise RuntimeError("Error parsing .tex file: %s" % tex_path)
260 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
261 pdf_path = os.path.join(temp, 'doc.pdf')
262 shutil.move(pdf_path, output_file.name)
264 os.system("ls -l " + output_file.name)
265 return OutputFile.from_filename(output_file.name)
267 def render(self, element, ctx):
268 return self.renderers.get_for(element).render(element, ctx)
271 class CmdRenderer(TreeRenderer):
276 root = etree.Element(self.root_name)
277 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
282 class EnvRenderer(TreeRenderer):
284 root = etree.Element(self.root_name)
285 inner = etree.SubElement(root, 'env', name=self.tag_name)
289 class GroupRenderer(CmdRenderer):
291 root = etree.Element(self.root_name)
292 inner = etree.SubElement(root, 'group')
294 inner.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
298 class SectionRenderer(CmdRenderer):
299 def subcontext(self, element, ctx):
301 return Context(ctx, toc_level=getattr(ctx, 'toc_level', 1) + 2)
304 root = etree.Element(self.root_name)
305 root.append(texml_cmd('pagebreak', opts=['1']))
306 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
310 PdfFormat.renderers.register(core.Section, None, SectionRenderer('par'))
313 PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*'))
315 PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
318 class ImgRenderer(CmdRenderer):
322 def render(self, element, ctx):
323 root = super(ImgRenderer, self).render(element, ctx)
324 url = element.get('src')
325 nr = getattr(ctx, 'img', 0)
327 ctx.format.add_file(ctx, 'f%d.png' % nr, url, image=True)
328 root[0][0].text = 'f%d.png' % nr
330 size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
331 except IOError: # not an image
334 root[0][1].text = '15cm'
335 root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
338 PdfFormat.renderers.register(core.Div, 'img', ImgRenderer('insertimage'))
341 class VideoRenderer(CmdRenderer):
342 def render(self, element, ctx):
343 root = super(VideoRenderer, self).render(element, ctx)
344 url = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid')
345 link = texml_cmd('href', url, url)
346 root[0][0].text = None
347 root[0][0].append(link)
350 PdfFormat.renderers.register(core.Div, 'video', VideoRenderer('par'))
353 PdfFormat.renderers.register(core.Div, 'defined', CmdRenderer('textbf'))
354 PdfFormat.renderers.register(core.Div, 'item', CmdRenderer('item'))
355 PdfFormat.renderers.register(core.Span, 'item', CmdRenderer('item'))
356 PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize'))
357 PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
360 PdfFormat.renderers.register(core.Span, None, TreeRenderer())
361 PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
362 PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
363 PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
364 PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
367 class SpanUri(CmdRenderer):
371 def render(self, element, ctx):
372 root = super(SpanUri, self).render(element, ctx)
374 if src.startswith('file://'):
375 src = ctx.files_path + src[7:]
376 root[0][0].text = src
378 PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
381 class SpanLink(CmdRenderer):
385 def render(self, element, ctx):
386 root = super(SpanLink, self).render(element, ctx)
387 src = element.attrib.get('href', '')
388 if src.startswith('file://'):
389 src = ctx.files_path + src[7:]
390 root[0][0].text = src
392 PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
395 PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
396 PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
397 PdfFormat.renderers.register(core.Aside, 'comment', Silent())