1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
13 from copy import deepcopy
17 from StringIO import StringIO
18 from tempfile import mkdtemp, NamedTemporaryFile
21 from copy import deepcopy
22 from subprocess import call, PIPE
23 from urllib2 import urlopen
25 from Texml.processor import process
26 from lxml import etree
27 from lxml.etree import XMLSyntaxError, XSLTApplyError
29 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
30 from librarian.dcparser import Person
31 from librarian.parser import WLDocument
32 from librarian import ParseError, DCNS, get_resource, IOFile, Format
33 from librarian import functions
34 from pdf import PDFFormat
40 def _wrap(*args, **kw):
41 value = f(*args, **kw)
43 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
45 if isinstance(value, list):
46 import pdb; pdb.set_trace()
47 if isinstance(value, tuple):
48 return prefix + value[0], value[1] + postfix
50 return prefix + value + postfix
55 def cmd(name, parms=None):
56 def wrap(self, element=None):
57 pre, post = tag_open_close('cmd', name=name)
61 e = etree.Element("parm")
63 pre += etree.tostring(e)
64 if element is not None:
66 post = "</parm>" + post
73 def mark_alien_characters(text):
74 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
78 class EduModule(Xmill):
79 def __init__(self, options=None):
80 super(EduModule, self).__init__(options)
81 self.activity_counter = 0
82 self.exercise_counter = 0
84 def swap_endlines(txt):
85 if self.options['strofa']:
86 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
88 self.register_text_filter(functions.substitute_entities)
89 self.register_text_filter(mark_alien_characters)
90 self.register_text_filter(swap_endlines)
92 def get_dc(self, element, dc_field, single=False):
93 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
98 def handle_rdf__RDF(self, _):
99 "skip metadata in generation"
103 def get_rightsinfo(self, element):
104 rights_lic = self.get_dc(element, 'rights.license', True)
105 return u'<cmd name="rightsinfostr">' + \
106 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
107 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
111 def get_authors(self, element):
112 authors = self.get_dc(element, 'creator.expert') + \
113 self.get_dc(element, 'creator.scenario') + \
114 self.get_dc(element, 'creator.textbook')
115 return u', '.join(authors)
118 def get_title(self, element):
119 return self.get_dc(element, 'title', True)
121 def handle_utwor(self, element):
124 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
126 \\documentclass[%s]{wl}
127 \\usepackage{style}''' % self.options['customization_str'],
128 self.options['has_cover'] and '\usepackage{makecover}',
129 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
130 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
131 (self.options['morefloats'] == 'none' and
132 u'''\\IfFileExists{morefloats.sty}{
133 \\usepackage{morefloats}
135 u'''\\def\\authors{%s}''' % self.get_authors(element),
136 u'''\\author{\\authors}''',
137 u'''\\title{%s}''' % self.get_title(element),
138 u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
139 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
142 return u"".join(filter(None, lines)), u'</TeXML>'
146 def handle_powiesc(self, element):
148 <env name="document">
149 <cmd name="maketitle"/>
153 def handle_texcommand(self, element):
154 cmd = functions.texcommand(element.tag)
155 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
161 handle_akap_dialog = \
162 handle_akap_dialog = \
163 handle_autor_utworu = \
165 handle_didaskalia = \
166 handle_didask_tekst = \
167 handle_dlugi_cytat = \
168 handle_dzielo_nadrzedne = \
169 handle_lista_osoba = \
171 handle_miejsce_czas = \
173 handle_motto_podpis = \
174 handle_naglowek_akt = \
175 handle_naglowek_czesc = \
176 handle_naglowek_listy = \
177 handle_naglowek_osoba = \
178 handle_naglowek_podrozdzial = \
179 handle_naglowek_podrozdzial = \
180 handle_naglowek_rozdzial = \
181 handle_naglowek_rozdzial = \
182 handle_naglowek_scena = \
183 handle_nazwa_utworu = \
189 handle_poezja_cyt = \
192 handle_sekcja_asterysk = \
193 handle_sekcja_swiatlo = \
194 handle_separator_linia = \
195 handle_slowo_obce = \
197 handle_tytul_dziela = \
198 handle_wyroznienie = \
201 _handle_strofa = cmd("strofa")
203 def handle_strofa(self, element):
204 self.options = {'strofa': True}
205 return self._handle_strofa(element)
207 def handle_aktywnosc(self, element):
208 self.activity_counter += 1
211 'activity_counter': self.activity_counter,
214 submill = EduModule(self.options)
216 opis = submill.generate(element.xpath('opis')[0])
218 n = element.xpath('wskazowki')
219 if n: wskazowki = submill.generate(n[0])
222 n = element.xpath('pomoce')
224 if n: pomoce = submill.generate(n[0])
227 forma = ''.join(element.xpath('forma/text()'))
229 czas = ''.join(element.xpath('czas/text()'))
231 counter = self.activity_counter
235 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
236 <cmd name="activityinfo"><parm>
237 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
238 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
239 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
248 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
249 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
251 @ifoption(sub_gen=True)
252 def handle_pomoce(self, _):
253 return "Pomoce: ", ""
255 def handle_czas(self, *_):
258 def handle_forma(self, *_):
261 def handle_lista(self, element, attrs={}):
262 if not element.findall("punkt"):
264 ltype = element.attrib.get('typ', 'punkt')
265 if ltype == 'slowniczek':
266 surl = element.attrib.get('src', None)
268 # print '** missing src on <slowniczek>, setting default'
269 surl = 'http://edukacjamedialna.edu.pl/slowniczek'
272 sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
273 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
275 listcmd = {'num': 'enumerate',
278 'slowniczek': 'itemize',
279 'czytelnia': 'itemize'}[ltype]
281 return u'<env name="%s">' % listcmd, u'</env>'
283 def handle_punkt(self, element):
284 return '<cmd name="item"/>', ''
286 def handle_cwiczenie(self, element):
287 exercise_handlers = {
289 'uporzadkuj': Uporzadkuj,
292 'przyporzadkuj': Przyporzadkuj,
293 'prawdafalsz': PrawdaFalsz
296 typ = element.attrib['typ']
297 self.exercise_counter += 1
298 if not typ in exercise_handlers:
299 return '(no handler)'
300 self.options = {'exercise_counter': self.exercise_counter}
301 handler = exercise_handlers[typ](self.options)
302 return handler.generate(element)
304 # XXX this is copied from pyhtml.py, except for return and
305 # should be refactored for no code duplication
306 def handle_definiendum(self, element):
307 nxt = element.getnext()
310 # let's pull definiens from another document
311 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
312 sxml = self.options['slowniczek_xml']
313 assert element.text != ''
314 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
316 definiens = defloc[0].getnext()
317 if definiens.tag == 'definiens':
318 subgen = EduModule(self.options)
319 definiens_s = subgen.generate(definiens)
321 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
323 def handle_definiens(self, element):
326 def handle_podpis(self, element):
327 return u"""<env name="figure">""", u"</env>"
329 def handle_tabela(self, element):
331 for w in element.xpath("wiersz"):
333 if max_col < len(ks):
335 self.options = {'columnts': max_col}
337 # has_frames = int(element.attrib.get("ramki", "0"))
338 # if has_frames: frames_c = "framed"
339 # else: frames_c = ""
340 # return u"""<table class="%s">""" % frames_c, u"</table>"
342 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
343 ''' % ('l' * max_col), \
344 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
347 def handle_wiersz(self, element):
348 return u"", u'<ctrl ch="\\"/>'
351 def handle_kol(self, element):
352 if element.getnext() is not None:
353 return u"", u'<spec cat="align" />'
356 def handle_link(self, element):
357 if element.attrib.get('url'):
358 url = element.attrib.get('url')
359 if url == element.text:
360 return cmd('url')(self, element)
362 return cmd('href', parms=[element.attrib['url']])(self, element)
364 return cmd('em')(self, element)
366 def handle_obraz(self, element):
367 frmt = self.options['format']
368 name = element.attrib['nazwa'].strip()
369 image = frmt.get_image(name.strip())
370 img_path = "obraz/%s" % name.replace("_", "")
371 frmt.attachments[img_path] = image
372 return cmd("obraz", parms=[img_path])(self)
374 def handle_video(self, element):
375 url = element.attrib.get('url')
377 print '!! <video> missing url'
379 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
381 print '!! unknown <video> url scheme:', url
384 thumb = IOFile.from_string(urlopen
385 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
386 img_path = "video/%s.jpg" % name.replace("_", "")
387 self.options['format'].attachments[img_path] = thumb
388 canon_url = "https://www.youtube.com/watch?v=%s" % name
389 return cmd("video", parms=[img_path, canon_url])(self)
392 class Exercise(EduModule):
393 def __init__(self, *args, **kw):
394 self.question_counter = 0
395 super(Exercise, self).__init__(*args, **kw)
397 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
399 def handle_cwiczenie(self, element):
401 'exercise': element.attrib['typ'],
404 self.question_counter = 0
405 self.piece_counter = 0
407 header = etree.Element("parm")
408 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
409 header_cmd.append(header)
410 header.text = u"Zadanie %d." % self.options['exercise_counter']
412 pre = etree.tostring(header_cmd, encoding=unicode)
414 # Add a single <pytanie> tag if it's not there
415 if not element.xpath(".//pytanie"):
416 qpre, qpost = self.handle_pytanie(element)
421 def handle_pytanie(self, element):
422 """This will handle <cwiczenie> element, when there is no <pytanie>
424 self.question_counter += 1
425 self.piece_counter = 0
427 if self.options['teacher'] and element.attrib.get('rozw'):
428 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
431 def handle_punkt(self, element):
432 pre, post = super(Exercise, self).handle_punkt(element)
433 if self.options['teacher'] and element.attrib.get('rozw'):
434 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
437 def solution_header(self):
438 par = etree.Element("cmd", name="par")
439 parm = etree.Element("parm")
440 parm.text = u"Rozwiązanie:"
442 return etree.tostring(par)
444 def explicit_solution(self):
445 if self.options['solution']:
446 par = etree.Element("cmd", name="par")
447 parm = etree.Element("parm")
448 parm.text = self.options['solution']
450 return self.solution_header() + etree.tostring(par)
454 class Wybor(Exercise):
455 def handle_cwiczenie(self, element):
456 pre, post = super(Wybor, self).handle_cwiczenie(element)
457 is_single_choice = True
458 pytania = element.xpath(".//pytanie")
462 solutions = re.split(r"[, ]+", p.attrib['rozw'])
463 if len(solutions) != 1:
464 is_single_choice = False
466 choices = p.xpath(".//*[@nazwa]")
468 for n in choices: uniq.add(n.attrib['nazwa'])
469 if len(choices) != len(uniq):
470 is_single_choice = False
473 self.options = {'single': is_single_choice}
476 def handle_punkt(self, element):
477 if self.options['exercise'] and element.attrib.get('nazwa', None):
478 cmd = 'radio' if self.options['single'] else 'checkbox'
479 return u'<cmd name="%s"/>' % cmd, ''
481 return super(Wybor, self).handle_punkt(element)
484 class Uporzadkuj(Exercise):
485 def handle_pytanie(self, element):
486 order_items = element.xpath(".//punkt/@rozw")
487 return super(Uporzadkuj, self).handle_pytanie(element)
490 class Przyporzadkuj(Exercise):
491 def handle_lista(self, lista):
492 header = etree.Element("parm")
493 header_cmd = etree.Element("cmd", name="par")
494 header_cmd.append(header)
495 if 'nazwa' in lista.attrib:
496 header.text = u"Kategorie:"
497 elif 'cel' in lista.attrib:
498 header.text = u"Elementy do przyporządkowania:"
500 header.text = u"Lista:"
501 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
502 pre = etree.tostring(header_cmd, encoding=unicode) + pre
506 class Luki(Exercise):
507 def find_pieces(self, question):
508 return question.xpath(".//luka")
510 def solution(self, piece):
511 piece = deepcopy(piece)
514 return sub.generate(piece)
516 def handle_pytanie(self, element):
517 qpre, qpost = super(Luki, self).handle_pytanie(element)
519 luki = self.find_pieces(element)
521 self.words = u"<env name='itemize'>%s</env>" % (
522 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
526 def handle_opis(self, element):
527 return '', self.words
529 def handle_luka(self, element):
531 if self.options['teacher']:
532 piece = deepcopy(element)
535 text = sub.generate(piece)
536 luka += u" [rozwiązanie: %s]" % text
541 def find_pieces(self, question):
542 return question.xpath(".//zastap")
544 def solution(self, piece):
545 return piece.attrib['rozw']
547 def list_header(self):
548 return u"Elementy do wstawienia"
550 def handle_zastap(self, element):
551 piece = deepcopy(element)
554 text = sub.generate(piece)
555 if self.options['teacher'] and element.attrib.get('rozw'):
556 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
560 class PrawdaFalsz(Exercise):
561 def handle_punkt(self, element):
562 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
563 if 'rozw' in element.attrib:
564 post += u" [Prawda/Fałsz]"
570 lists = tree.xpath(".//lista")
575 if p.tail is None: p.tail = ''
579 if p.text is None: p.text = ''
585 class EduModulePDFFormat(PDFFormat):
587 self.attachments = {}
589 'provider': self.wldoc.provider,
591 "teacher": self.customization.get('teacher'),
593 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
595 open("/tmp/texml.xml", "w").write(texml)
598 def get_tex_dir(self):
599 temp = super(EduModulePDFFormat, self).get_tex_dir()
600 for name, iofile in self.attachments.items():
601 iofile.save_as(os.path.join(temp, name))
604 def get_image(self, name):
605 return self.wldoc.source.attachments[name]