1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
8 from subprocess import call, PIPE
9 from tempfile import NamedTemporaryFile, mkdtemp
10 from lxml import etree
11 from urllib import urlretrieve
12 from StringIO import StringIO
13 from Texml.processor import process
14 from librarian import DCNS, XMLNamespace, BuildError
15 from librarian.formats import Format
16 from librarian.output import OutputFile
17 from librarian.renderers import Register, TreeRenderer
18 from librarian.utils import Context, get_resource
19 from librarian import core
21 from ..html import Silent
24 TexmlNS = XMLNamespace('http://getfo.sourceforge.net/texml/ns1')
27 def texml_cmd(name, *parms, **kwargs):
28 cmd = etree.Element(TexmlNS('cmd'), name=name)
29 for opt in kwargs.get('opts', []):
30 etree.SubElement(cmd, TexmlNS('opt')).text = opt
32 etree.SubElement(cmd, TexmlNS('parm')).text = parm
36 class PdfFormat(Format):
40 style = get_resource('formats/pdf/res/default.sty')
43 get_resource('formats/pdf/res/coverimage.sty'),
44 get_resource('formats/pdf/res/insertimage.sty'),
47 renderers = Register()
49 def retrieve_file(self, url, save_as):
53 def add_file(self, ctx, filename, url=None, path=None, image=False):
54 from subprocess import call
55 if not url and not path:
56 raise BuildError('No URL or path for image')
57 save_as = os.path.join(ctx.workdir, filename)
59 ext = path.rsplit('.', 1)[-1]
62 call(['convert', path, save_as])
64 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
65 call(['convert', '-units', 'PixelsPerInch', path, '-density', '300', save_as + '_.' + ext])
66 shutil.move(save_as + '_.' + ext, save_as)
68 shutil.copy(path, save_as)
69 elif not self.retrieve_file(url, save_as):
70 if url.startswith('file://'):
71 url = ctx.files_path + url[7:]
73 if url.startswith('/'):
74 url = 'http://milpeer.eu' + url
76 ext = url.rsplit('.', 1)[-1]
78 urlretrieve(url, save_as + '_.' + ext)
80 call(['convert', save_as + '_.' + ext, save_as])
82 # JPEGs with bad density will break LaTeX with 'Dimension too large'.
83 r = call(['convert', '-units', 'PixelsPerInch', save_as + '_.' + ext, '-density', '300',
84 save_as + '_2.' + ext])
86 shutil.move(save_as + '_.' + ext, save_as)
88 shutil.move(save_as + '_2.' + ext, save_as)
90 urlretrieve(url, save_as)
92 def get_file(self, ctx, filename):
93 return os.path.join(ctx.workdir, filename)
95 def get_texml(self, build_ctx):
96 t = etree.Element(TexmlNS('TeXML'))
98 self.add_file(build_ctx, 'wl.cls', path=get_resource('formats/pdf/res/wl.cls'))
99 t.append(texml_cmd("documentclass", "wl"))
102 self.add_file(build_ctx, 'style.sty', path=self.style)
103 t.append(texml_cmd("usepackage", "style"))
104 t.append(texml_cmd("usepackage", "hyphenat"))
107 for i, package in enumerate(self.local_packages):
108 self.add_file(build_ctx, "librarianlocalpackage%s.sty" % i, path=package)
109 t.append(texml_cmd("usepackage", "librarianlocalpackage%s" % i))
111 author = ", ". join(self.doc.meta.get(DCNS('creator')) or '')
112 title = self.doc.meta.title()
113 t.append(texml_cmd("author", author))
114 t.append(texml_cmd("title", title))
116 doc = etree.SubElement(t, TexmlNS('env'), name="document")
117 doc.append(texml_cmd("thispagestyle", "empty"))
121 cover_url = self.doc.meta.get_one(DCNS('relation.coverimage.url'))
123 self.add_file(build_ctx, 'cover.png', cover_url, image=True)
125 img = Image.open(self.get_file(build_ctx, 'cover.png'))
128 if size[1] > size[0]:
129 img = img.crop((0, 0, size[0], size[0]))
130 img.save(self.get_file(build_ctx, 'cover.png'), format=img.format, quality=90)
133 # TODO: hardcoded paper size here
134 height = 210.0 * size[1] / size[0]
135 doc.append(texml_cmd("makecover", "%fmm" % height))
137 doc.append(texml_cmd("vfill*"))
140 grp = etree.SubElement(doc, 'group')
141 grp.append(texml_cmd("raggedright"))
142 grp.append(texml_cmd("vfill"))
144 p = texml_cmd("par", "")
146 p[0].append(texml_cmd("Large"))
147 p[0].append(texml_cmd("noindent"))
148 p[0].append(texml_cmd("nohyphens", author))
149 p[0].append(texml_cmd("vspace", "1em"))
150 # p[0][-1].tail = author
152 p = texml_cmd("par", "")
154 p[0].append(texml_cmd("Huge"))
155 p[0].append(texml_cmd("noindent"))
156 p[0].append(texml_cmd("nohyphens", title))
157 # p[0][-1].tail = title
158 doc.append(texml_cmd("vfill"))
159 doc.append(texml_cmd("vfill"))
161 # IOFile probably would be better
162 cover_logo_url = getattr(build_ctx, 'cover_logo', None)
165 # cover_logo_url = 'http://milpeer.mdrn.pl/media/dynamic/people/logo/nowoczesnapolska.org.pl.png'
167 self.add_file(build_ctx, 'coverlogo.png', cover_logo_url, image=True)
168 size = Image.open(self.get_file(build_ctx, 'coverlogo.png')).size
169 p = texml_cmd("par", "")
171 p[0].append(texml_cmd("noindent"))
172 p[0].append(texml_cmd("insertimage", 'coverlogo.png', "%fcm" % (1.0 * size[0] / size[1]), "1cm"))
175 doc.append(texml_cmd("clearpage"))
177 ctx = Context(build_ctx, format=self, img=1)
178 doc.extend(self.render(self.doc.edoc.getroot(), ctx))
180 # Redakcyjna na końcu.
181 doc.append(texml_cmd("clearpage"))
183 doc.append(texml_cmd("section*", "Information about the resource"))
184 doc.append(texml_cmd("vspace", "1em"))
187 ('Publisher: ', DCNS('publisher')),
188 ('Rights: ', DCNS('rights')),
189 ('Intended audience: ', DCNS('audience')),
190 ('', DCNS('description'))):
191 v = self.doc.meta.get_one(f)
193 e = texml_cmd("par", "")
194 e[0].append(texml_cmd("noindent"))
195 e[0][0].tail = "%s%s" % (m, v)
197 doc.append(texml_cmd("vspace", "1em"))
199 e = texml_cmd("par", "")
200 e[0].append(texml_cmd("noindent"))
201 e[0][0].tail = "Resource prepared using "
202 e[0].append(texml_cmd("href", "http://milpeer.eu", "MIL/PEER"))
203 e[0][-1].tail = " editing platform. "
206 source_url = getattr(build_ctx, 'source_url', None)
207 # source_url = 'http://milpeer.mdrn.pl/documents/27/'
209 e = texml_cmd("par", "")
211 e[0].append(texml_cmd("noindent"))
212 e[0][0].tail = "Source available at "
213 e[0].append(texml_cmd("href", source_url, source_url))
217 def get_tex_dir(self, ctx):
218 ctx.workdir = mkdtemp('-wl2pdf')
219 texml = self.get_texml(ctx)
220 tex_path = os.path.join(ctx.workdir, 'doc.tex')
221 with open(tex_path, 'w') as fout:
222 # print etree.tostring(texml)
223 process(StringIO(etree.tostring(texml)), fout, 'utf-8')
226 # shutil.copy(tex_path, self.save_tex)
228 # for sfile in ['wasysym.sty', 'uwasyvar.fd', 'uwasy.fd']:
229 # shutil.copy(get_resource(os.path.join('res/wasysym', sfile)), temp)
232 def build(self, ctx=None, verbose=False):
233 temp = self.get_tex_dir(ctx)
234 tex_path = os.path.join(temp, 'doc.tex')
242 for i in range(self.tex_passes):
243 p = call(['xelatex', tex_path])
245 for i in range(self.tex_passes):
246 p = call(['xelatex', '-interaction=batchmode', tex_path],
247 stdout=PIPE, stderr=PIPE)
249 # raise ParseError("Error parsing .tex file: %s" % tex_path)
250 raise RuntimeError("Error parsing .tex file: %s" % tex_path)
255 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
256 pdf_path = os.path.join(temp, 'doc.pdf')
257 shutil.move(pdf_path, output_file.name)
259 os.system("ls -l " + output_file.name)
260 return OutputFile.from_filename(output_file.name)
262 def render(self, element, ctx):
263 return self.renderers.get_for(element).render(element, ctx)
266 class CmdRenderer(TreeRenderer):
271 root = etree.Element(self.root_name)
272 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
277 class EnvRenderer(TreeRenderer):
279 root = etree.Element(self.root_name)
280 inner = etree.SubElement(root, 'env', name=self.tag_name)
284 class GroupRenderer(CmdRenderer):
286 root = etree.Element(self.root_name)
287 inner = etree.SubElement(root, 'group')
289 inner.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
293 class SectionRenderer(CmdRenderer):
294 def subcontext(self, element, ctx):
296 return Context(ctx, toc_level=getattr(ctx, 'toc_level', 1) + 2)
299 root = etree.Element(self.root_name)
300 root.append(texml_cmd('pagebreak', opts=['1']))
301 root.append(texml_cmd(self.tag_name, *(self.parms() + [""])))
305 PdfFormat.renderers.register(core.Section, None, SectionRenderer('par'))
308 PdfFormat.renderers.register(core.Header, None, CmdRenderer('section*'))
310 PdfFormat.renderers.register(core.Div, None, CmdRenderer('par'))
313 class ImgRenderer(CmdRenderer):
317 def render(self, element, ctx):
318 root = super(ImgRenderer, self).render(element, ctx)
319 url = element.get('src')
320 nr = getattr(ctx, 'img', 0)
322 ctx.format.add_file(ctx, 'f%d.png' % nr, url, image=True)
323 root[0][0].text = 'f%d.png' % nr
325 size = Image.open(ctx.format.get_file(ctx, 'f%d.png' % nr)).size
326 except IOError: # not an image
329 root[0][1].text = '15cm'
330 root[0][2].text = '%fcm' % (15.0 * size[1] / size[0])
333 PdfFormat.renderers.register(core.Div, 'img', ImgRenderer('insertimage'))
336 class VideoRenderer(CmdRenderer):
337 def render(self, element, ctx):
338 root = super(VideoRenderer, self).render(element, ctx)
339 url = 'https://www.youtube.com/watch?v=%s' % element.attrib.get('videoid')
340 link = texml_cmd('href', url, url)
341 root[0][0].text = None
342 root[0][0].append(link)
345 PdfFormat.renderers.register(core.Div, 'video', VideoRenderer('par'))
348 PdfFormat.renderers.register(core.Div, 'defined', CmdRenderer('textbf'))
349 PdfFormat.renderers.register(core.Div, 'item', CmdRenderer('item'))
350 PdfFormat.renderers.register(core.Div, 'list', EnvRenderer('itemize'))
351 PdfFormat.renderers.register(core.Div, 'list.enum', EnvRenderer('enumerate'))
354 PdfFormat.renderers.register(core.Span, None, TreeRenderer())
355 PdfFormat.renderers.register(core.Span, 'cite', CmdRenderer('emph'))
356 PdfFormat.renderers.register(core.Span, 'cite.code', CmdRenderer('texttt'))
357 PdfFormat.renderers.register(core.Span, 'emp', CmdRenderer('textbf'))
358 PdfFormat.renderers.register(core.Span, 'emph', CmdRenderer('emph'))
361 class SpanUri(CmdRenderer):
365 def render(self, element, ctx):
366 root = super(SpanUri, self).render(element, ctx)
368 if src.startswith('file://'):
369 src = ctx.files_path + src[7:]
370 root[0][0].text = src
372 PdfFormat.renderers.register(core.Span, 'uri', SpanUri('href'))
375 class SpanLink(CmdRenderer):
379 def render(self, element, ctx):
380 root = super(SpanLink, self).render(element, ctx)
381 src = element.attrib.get('href', '')
382 if src.startswith('file://'):
383 src = ctx.files_path + src[7:]
384 root[0][0].text = src
386 PdfFormat.renderers.register(core.Span, 'link', SpanLink('href'))
389 PdfFormat.renderers.register(core.Aside, None, TreeRenderer())
390 PdfFormat.renderers.register(core.Aside, 'editorial', CmdRenderer('editorialpage'))
391 PdfFormat.renderers.register(core.Aside, 'comment', Silent())