1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
13 from copy import deepcopy
17 from StringIO import StringIO
18 from tempfile import mkdtemp, NamedTemporaryFile
21 from copy import deepcopy
22 from subprocess import call, PIPE
23 from urllib2 import urlopen
25 from Texml.processor import process
26 from lxml import etree
27 from lxml.etree import XMLSyntaxError, XSLTApplyError
29 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
30 from librarian.dcparser import Person
31 from librarian.parser import WLDocument
32 from librarian import ParseError, DCNS, get_resource, IOFile, Format
33 from librarian import functions
34 from pdf import PDFFormat
40 def _wrap(*args, **kw):
41 value = f(*args, **kw)
43 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
45 if isinstance(value, list):
46 import pdb; pdb.set_trace()
47 if isinstance(value, tuple):
48 return prefix + value[0], value[1] + postfix
50 return prefix + value + postfix
55 def cmd(name, parms=None):
56 def wrap(self, element=None):
57 pre, post = tag_open_close('cmd', name=name)
61 e = etree.Element("parm")
63 pre += etree.tostring(e)
64 if element is not None:
66 post = "</parm>" + post
73 def mark_alien_characters(text):
74 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
78 class EduModule(Xmill):
79 def __init__(self, options=None):
80 super(EduModule, self).__init__(options)
81 self.activity_counter = 0
82 self.exercise_counter = 0
84 def swap_endlines(txt):
85 if self.options['strofa']:
86 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
88 self.register_text_filter(functions.substitute_entities)
89 self.register_text_filter(mark_alien_characters)
90 self.register_text_filter(swap_endlines)
92 def get_dc(self, element, dc_field, single=False):
93 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
98 def handle_rdf__RDF(self, _):
99 "skip metadata in generation"
103 def get_rightsinfo(self, element):
104 rights_lic = self.get_dc(element, 'rights.license', True)
105 return u'<cmd name="rightsinfostr">' + \
106 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
107 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
111 def get_authors(self, element):
112 authors = self.get_dc(element, 'creator.expert') + \
113 self.get_dc(element, 'creator.scenario') + \
114 self.get_dc(element, 'creator.textbook')
115 return u', '.join(authors)
118 def get_title(self, element):
119 return self.get_dc(element, 'title', True)
121 def handle_utwor(self, element):
124 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
126 \\documentclass[%s]{wl}
127 \\usepackage{style}''' % self.options['customization_str'],
128 self.options['has_cover'] and '\usepackage{makecover}',
129 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
130 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
131 (self.options['morefloats'] == 'none' and
132 u'''\\IfFileExists{morefloats.sty}{
133 \\usepackage{morefloats}
135 u'''\\def\\authors{%s}''' % self.get_authors(element),
136 u'''\\author{\\authors}''',
137 u'''\\title{%s}''' % self.get_title(element),
138 u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
139 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
142 return u"".join(filter(None, lines)), u'</TeXML>'
146 def handle_powiesc(self, element):
148 <env name="document">
149 <cmd name="maketitle"/>
153 def handle_texcommand(self, element):
154 cmd = functions.texcommand(element.tag)
155 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
161 handle_akap_dialog = \
162 handle_akap_dialog = \
163 handle_autor_utworu = \
165 handle_didaskalia = \
166 handle_didask_tekst = \
167 handle_dlugi_cytat = \
168 handle_dzielo_nadrzedne = \
169 handle_lista_osoba = \
171 handle_miejsce_czas = \
173 handle_motto_podpis = \
174 handle_naglowek_akt = \
175 handle_naglowek_czesc = \
176 handle_naglowek_listy = \
177 handle_naglowek_osoba = \
178 handle_naglowek_podrozdzial = \
179 handle_naglowek_podrozdzial = \
180 handle_naglowek_rozdzial = \
181 handle_naglowek_rozdzial = \
182 handle_naglowek_scena = \
183 handle_nazwa_utworu = \
189 handle_poezja_cyt = \
192 handle_sekcja_asterysk = \
193 handle_sekcja_swiatlo = \
194 handle_separator_linia = \
195 handle_slowo_obce = \
197 handle_tytul_dziela = \
198 handle_wyroznienie = \
201 _handle_strofa = cmd("strofa")
203 def handle_strofa(self, element):
204 self.options = {'strofa': True}
205 return self._handle_strofa(element)
207 def handle_aktywnosc(self, element):
208 self.activity_counter += 1
211 'activity_counter': self.activity_counter,
214 submill = EduModule(self.options)
216 opis = submill.generate(element.xpath('opis')[0])
218 n = element.xpath('wskazowki')
219 if n: wskazowki = submill.generate(n[0])
222 n = element.xpath('pomoce')
224 if n: pomoce = submill.generate(n[0])
227 forma = ''.join(element.xpath('forma/text()'))
229 czas = ''.join(element.xpath('czas/text()'))
231 counter = self.activity_counter
235 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
236 <cmd name="activityinfo"><parm>
237 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
238 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
239 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
248 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
249 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
251 @ifoption(sub_gen=True)
252 def handle_pomoce(self, _):
253 return "Pomoce: ", ""
255 def handle_czas(self, *_):
258 def handle_forma(self, *_):
261 def handle_lista(self, element, attrs={}):
262 if not element.findall("punkt"):
264 ltype = element.attrib.get('typ', 'punkt')
265 if ltype == 'slowniczek':
266 surl = element.attrib.get('href', None)
269 sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
270 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
272 listcmd = {'num': 'enumerate',
275 'slowniczek': 'itemize',
276 'czytelnia': 'itemize'}[ltype]
278 return u'<env name="%s">' % listcmd, u'</env>'
280 def handle_punkt(self, element):
281 return '<cmd name="item"/>', ''
283 def handle_cwiczenie(self, element):
284 exercise_handlers = {
286 'uporzadkuj': Uporzadkuj,
289 'przyporzadkuj': Przyporzadkuj,
290 'prawdafalsz': PrawdaFalsz
293 typ = element.attrib['typ']
294 self.exercise_counter += 1
295 if not typ in exercise_handlers:
296 return '(no handler)'
297 self.options = {'exercise_counter': self.exercise_counter}
298 handler = exercise_handlers[typ](self.options)
299 return handler.generate(element)
301 # XXX this is copied from pyhtml.py, except for return and
302 # should be refactored for no code duplication
303 def handle_definiendum(self, element):
304 nxt = element.getnext()
307 # let's pull definiens from another document
308 if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'):
309 sxml = self.options['slowniczek_xml']
310 assert element.text != ''
311 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
313 definiens = defloc[0].getnext()
314 if definiens.tag == 'definiens':
315 subgen = EduModule(self.options)
316 definiens_s = subgen.generate(definiens)
318 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
320 def handle_definiens(self, element):
323 def handle_podpis(self, element):
324 return u"""<env name="figure">""", u"</env>"
326 def handle_tabela(self, element):
328 for w in element.xpath("wiersz"):
330 if max_col < len(ks):
332 self.options = {'columnts': max_col}
334 # has_frames = int(element.attrib.get("ramki", "0"))
335 # if has_frames: frames_c = "framed"
336 # else: frames_c = ""
337 # return u"""<table class="%s">""" % frames_c, u"</table>"
339 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
340 ''' % ('l' * max_col), \
341 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
344 def handle_wiersz(self, element):
345 return u"", u'<ctrl ch="\\"/>'
348 def handle_kol(self, element):
349 if element.getnext() is not None:
350 return u"", u'<spec cat="align" />'
353 def handle_link(self, element):
354 if element.attrib.get('url'):
355 url = element.attrib.get('url')
356 if url == element.text:
357 return cmd('url')(self, element)
359 return cmd('href', parms=[element.attrib['url']])(self, element)
361 return cmd('em')(self, element)
363 def handle_obraz(self, element):
364 frmt = self.options['format']
365 name = element.attrib['nazwa'].strip()
366 image = frmt.get_image(name.strip())
367 img_path = "obraz/%s" % name.replace("_", "")
368 frmt.attachments[img_path] = image
369 return cmd("obraz", parms=[img_path])(self)
371 def handle_video(self, element):
372 url = element.attrib.get('url')
374 print '!! <video> missing url'
376 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
378 print '!! unknown <video> url scheme:', url
381 thumb = IOFile.from_string(urlopen
382 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
383 img_path = "video/%s.jpg" % name.replace("_", "")
384 self.options['format'].attachments[img_path] = thumb
385 canon_url = "https://www.youtube.com/watch?v=%s" % name
386 return cmd("video", parms=[img_path, canon_url])(self)
389 class Exercise(EduModule):
390 def __init__(self, *args, **kw):
391 self.question_counter = 0
392 super(Exercise, self).__init__(*args, **kw)
394 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
396 def handle_cwiczenie(self, element):
398 'exercise': element.attrib['typ'],
401 self.question_counter = 0
402 self.piece_counter = 0
404 header = etree.Element("parm")
405 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
406 header_cmd.append(header)
407 header.text = u"Zadanie %d." % self.options['exercise_counter']
409 pre = etree.tostring(header_cmd, encoding=unicode)
411 # Add a single <pytanie> tag if it's not there
412 if not element.xpath(".//pytanie"):
413 qpre, qpost = self.handle_pytanie(element)
418 def handle_pytanie(self, element):
419 """This will handle <cwiczenie> element, when there is no <pytanie>
421 self.question_counter += 1
422 self.piece_counter = 0
424 if self.options['teacher'] and element.attrib.get('rozw'):
425 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
428 def handle_punkt(self, element):
429 pre, post = super(Exercise, self).handle_punkt(element)
430 if self.options['teacher'] and element.attrib.get('rozw'):
431 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
434 def solution_header(self):
435 par = etree.Element("cmd", name="par")
436 parm = etree.Element("parm")
437 parm.text = u"Rozwiązanie:"
439 return etree.tostring(par)
441 def explicit_solution(self):
442 if self.options['solution']:
443 par = etree.Element("cmd", name="par")
444 parm = etree.Element("parm")
445 parm.text = self.options['solution']
447 return self.solution_header() + etree.tostring(par)
451 class Wybor(Exercise):
452 def handle_cwiczenie(self, element):
453 pre, post = super(Wybor, self).handle_cwiczenie(element)
454 is_single_choice = True
455 pytania = element.xpath(".//pytanie")
459 solutions = re.split(r"[, ]+", p.attrib['rozw'])
460 if len(solutions) != 1:
461 is_single_choice = False
463 choices = p.xpath(".//*[@nazwa]")
465 for n in choices: uniq.add(n.attrib['nazwa'])
466 if len(choices) != len(uniq):
467 is_single_choice = False
470 self.options = {'single': is_single_choice}
473 def handle_punkt(self, element):
474 if self.options['exercise'] and element.attrib.get('nazwa', None):
475 cmd = 'radio' if self.options['single'] else 'checkbox'
476 return u'<cmd name="%s"/>' % cmd, ''
478 return super(Wybor, self).handle_punkt(element)
481 class Uporzadkuj(Exercise):
482 def handle_pytanie(self, element):
483 order_items = element.xpath(".//punkt/@rozw")
484 return super(Uporzadkuj, self).handle_pytanie(element)
487 class Przyporzadkuj(Exercise):
488 def handle_lista(self, lista):
489 header = etree.Element("parm")
490 header_cmd = etree.Element("cmd", name="par")
491 header_cmd.append(header)
492 if 'nazwa' in lista.attrib:
493 header.text = u"Kategorie:"
494 elif 'cel' in lista.attrib:
495 header.text = u"Elementy do przyporządkowania:"
497 header.text = u"Lista:"
498 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
499 pre = etree.tostring(header_cmd, encoding=unicode) + pre
503 class Luki(Exercise):
504 def find_pieces(self, question):
505 return question.xpath(".//luka")
507 def solution(self, piece):
508 piece = deepcopy(piece)
511 return sub.generate(piece)
513 def handle_pytanie(self, element):
514 qpre, qpost = super(Luki, self).handle_pytanie(element)
516 luki = self.find_pieces(element)
518 self.words = u"<env name='itemize'>%s</env>" % (
519 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
523 def handle_opis(self, element):
524 return '', self.words
526 def handle_luka(self, element):
528 if self.options['teacher']:
529 piece = deepcopy(element)
532 text = sub.generate(piece)
533 luka += u" [rozwiązanie: %s]" % text
538 def find_pieces(self, question):
539 return question.xpath(".//zastap")
541 def solution(self, piece):
542 return piece.attrib['rozw']
544 def list_header(self):
545 return u"Elementy do wstawienia"
547 def handle_zastap(self, element):
548 piece = deepcopy(element)
551 text = sub.generate(piece)
552 if self.options['teacher'] and element.attrib.get('rozw'):
553 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
557 class PrawdaFalsz(Exercise):
558 def handle_punkt(self, element):
559 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
560 if 'rozw' in element.attrib:
561 post += u" [Prawda/Fałsz]"
567 lists = tree.xpath(".//lista")
572 if p.tail is None: p.tail = ''
576 if p.text is None: p.text = ''
582 class EduModulePDFFormat(PDFFormat):
584 self.attachments = {}
587 "teacher": self.customization.get('teacher'),
589 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
591 open("/tmp/texml.xml", "w").write(texml)
594 def get_tex_dir(self):
595 temp = super(EduModulePDFFormat, self).get_tex_dir()
596 for name, iofile in self.attachments.items():
597 iofile.save_as(os.path.join(temp, name))
600 def get_image(self, name):
601 return self.wldoc.source.attachments[name]