1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
13 from copy import deepcopy
17 from StringIO import StringIO
18 from tempfile import mkdtemp, NamedTemporaryFile
21 from copy import deepcopy
22 from subprocess import call, PIPE
23 from urllib2 import urlopen
25 from Texml.processor import process
26 from lxml import etree
27 from lxml.etree import XMLSyntaxError, XSLTApplyError
29 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
30 from librarian.dcparser import Person
31 from librarian.parser import WLDocument
32 from librarian import ParseError, DCNS, get_resource, IOFile, Format
33 from librarian import functions
34 from pdf import PDFFormat
40 def _wrap(*args, **kw):
41 value = f(*args, **kw)
43 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
45 if isinstance(value, list):
46 import pdb; pdb.set_trace()
47 if isinstance(value, tuple):
48 return prefix + value[0], value[1] + postfix
50 return prefix + value + postfix
55 def cmd(name, parms=None):
56 def wrap(self, element=None):
57 pre, post = tag_open_close('cmd', name=name)
61 e = etree.Element("parm")
63 pre += etree.tostring(e)
64 if element is not None:
66 post = "</parm>" + post
73 def mark_alien_characters(text):
74 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
78 class EduModule(Xmill):
79 def __init__(self, options=None):
80 super(EduModule, self).__init__(options)
81 self.activity_counter = 0
82 self.exercise_counter = 0
84 def swap_endlines(txt):
85 if self.options['strofa']:
86 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
88 self.register_text_filter(functions.substitute_entities)
89 self.register_text_filter(mark_alien_characters)
90 self.register_text_filter(swap_endlines)
92 def get_dc(self, element, dc_field, single=False):
93 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
98 def handle_rdf__RDF(self, _):
99 "skip metadata in generation"
103 def get_rightsinfo(self, element):
104 rights_lic = self.get_dc(element, 'rights.license', True)
105 return u'<cmd name="rightsinfostr">' + \
106 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
107 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
111 def get_authors(self, element, which=None):
112 dc = self.options['wldoc'].book_info
114 authors = dc.authors_textbook + \
115 dc.authors_scenario + \
118 authors = getattr(dc, "authors_%s" % which)
119 return u', '.join(author.readable() for author in authors)
122 def get_title(self, element):
123 return self.get_dc(element, 'title', True)
125 def handle_utwor(self, element):
128 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
130 \\documentclass[%s]{wl}
131 \\usepackage{style}''' % self.options['customization_str'],
132 self.options['has_cover'] and '\usepackage{makecover}',
133 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
134 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
135 (self.options['morefloats'] == 'none' and
136 u'''\\IfFileExists{morefloats.sty}{
137 \\usepackage{morefloats}
139 u'''\\def\\authors{%s}''' % self.get_authors(element),
140 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
141 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
142 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
144 u'''\\author{\\authors}''',
145 u'''\\title{%s}''' % self.get_title(element),
146 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
147 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
150 return u"".join(filter(None, lines)), u'</TeXML>'
154 def handle_powiesc(self, element):
156 <env name="document">
157 <cmd name="maketitle"/>
158 """, """<cmd name="editorialsection" /></env>"""
161 def handle_texcommand(self, element):
162 cmd = functions.texcommand(element.tag)
163 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
169 handle_akap_dialog = \
170 handle_akap_dialog = \
171 handle_autor_utworu = \
173 handle_didaskalia = \
174 handle_didask_tekst = \
175 handle_dlugi_cytat = \
176 handle_dzielo_nadrzedne = \
177 handle_lista_osoba = \
179 handle_miejsce_czas = \
181 handle_motto_podpis = \
182 handle_naglowek_akt = \
183 handle_naglowek_czesc = \
184 handle_naglowek_listy = \
185 handle_naglowek_osoba = \
186 handle_naglowek_podrozdzial = \
187 handle_naglowek_podrozdzial = \
188 handle_naglowek_rozdzial = \
189 handle_naglowek_rozdzial = \
190 handle_naglowek_scena = \
191 handle_nazwa_utworu = \
197 handle_poezja_cyt = \
200 handle_sekcja_asterysk = \
201 handle_sekcja_swiatlo = \
202 handle_separator_linia = \
203 handle_slowo_obce = \
205 handle_tytul_dziela = \
206 handle_wyroznienie = \
209 def handle_uwaga(self, _e):
211 def handle_extra(self, _e):
214 _handle_strofa = cmd("strofa")
216 def handle_strofa(self, element):
217 self.options = {'strofa': True}
218 return self._handle_strofa(element)
220 def handle_aktywnosc(self, element):
221 self.activity_counter += 1
224 'activity_counter': self.activity_counter,
227 submill = EduModule(self.options)
229 opis = submill.generate(element.xpath('opis')[0])
231 n = element.xpath('wskazowki')
232 if n: wskazowki = submill.generate(n[0])
235 n = element.xpath('pomoce')
237 if n: pomoce = submill.generate(n[0])
240 forma = ''.join(element.xpath('forma/text()'))
242 czas = ''.join(element.xpath('czas/text()'))
244 counter = self.activity_counter
247 <cmd name="noindent" />
248 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
249 <cmd name="activityinfo"><parm>
250 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
251 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
252 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
261 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
262 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
264 @ifoption(sub_gen=True)
265 def handle_pomoce(self, _):
266 return "Pomoce: ", ""
268 def handle_czas(self, *_):
271 def handle_forma(self, *_):
274 def handle_lista(self, element, attrs={}):
275 if not element.findall("punkt"):
277 ltype = element.attrib.get('typ', 'punkt')
278 if ltype == 'slowniczek':
279 surl = element.attrib.get('src', None)
281 # print '** missing src on <slowniczek>, setting default'
282 surl = 'http://edukacjamedialna.edu.pl/slowniczek'
285 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
286 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
288 listcmd = {'num': 'enumerate',
291 'slowniczek': 'itemize',
292 'czytelnia': 'itemize'}[ltype]
294 return u'<env name="%s">' % listcmd, u'</env>'
296 def handle_punkt(self, element):
297 return '<cmd name="item"/>', ''
299 def handle_cwiczenie(self, element):
300 exercise_handlers = {
302 'uporzadkuj': Uporzadkuj,
305 'przyporzadkuj': Przyporzadkuj,
306 'prawdafalsz': PrawdaFalsz
309 typ = element.attrib['typ']
310 self.exercise_counter += 1
311 if not typ in exercise_handlers:
312 return '(no handler)'
313 self.options = {'exercise_counter': self.exercise_counter}
314 handler = exercise_handlers[typ](self.options)
315 return handler.generate(element)
317 # XXX this is copied from pyhtml.py, except for return and
318 # should be refactored for no code duplication
319 def handle_definiendum(self, element):
320 nxt = element.getnext()
323 # let's pull definiens from another document
324 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
325 sxml = self.options['slowniczek_xml']
326 assert element.text != ''
327 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
329 definiens = defloc[0].getnext()
330 if definiens.tag == 'definiens':
331 subgen = EduModule(self.options)
332 definiens_s = subgen.generate(definiens)
334 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
336 def handle_definiens(self, element):
339 def handle_podpis(self, element):
340 return u"""<env name="figure">""", u"</env>"
342 def handle_tabela(self, element):
344 for w in element.xpath("wiersz"):
346 if max_col < len(ks):
348 self.options = {'columnts': max_col}
350 # has_frames = int(element.attrib.get("ramki", "0"))
351 # if has_frames: frames_c = "framed"
352 # else: frames_c = ""
353 # return u"""<table class="%s">""" % frames_c, u"</table>"
355 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
356 ''' % ('l' * max_col), \
357 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
360 def handle_wiersz(self, element):
361 return u"", u'<ctrl ch="\\"/>'
364 def handle_kol(self, element):
365 if element.getnext() is not None:
366 return u"", u'<spec cat="align" />'
369 def handle_link(self, element):
370 if element.attrib.get('url'):
371 url = element.attrib.get('url')
372 if url == element.text:
373 return cmd('url')(self, element)
375 return cmd('href', parms=[element.attrib['url']])(self, element)
377 return cmd('emph')(self, element)
379 def handle_obraz(self, element):
380 frmt = self.options['format']
381 name = element.attrib['nazwa'].strip()
382 image = frmt.get_image(name.strip())
383 img_path = "obraz/%s" % name.replace("_", "")
384 frmt.attachments[img_path] = image
385 return cmd("obraz", parms=[img_path])(self)
387 def handle_video(self, element):
388 url = element.attrib.get('url')
390 print '!! <video> missing url'
392 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
394 print '!! unknown <video> url scheme:', url
397 thumb = IOFile.from_string(urlopen
398 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
399 img_path = "video/%s.jpg" % name.replace("_", "")
400 self.options['format'].attachments[img_path] = thumb
401 canon_url = "https://www.youtube.com/watch?v=%s" % name
402 return cmd("video", parms=[img_path, canon_url])(self)
405 class Exercise(EduModule):
406 def __init__(self, *args, **kw):
407 self.question_counter = 0
408 super(Exercise, self).__init__(*args, **kw)
410 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
412 def handle_cwiczenie(self, element):
414 'exercise': element.attrib['typ'],
417 self.question_counter = 0
418 self.piece_counter = 0
420 header = etree.Element("parm")
421 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
422 header_cmd.append(header)
423 header.text = u"Zadanie %d." % self.options['exercise_counter']
425 pre = etree.tostring(header_cmd, encoding=unicode)
427 # Add a single <pytanie> tag if it's not there
428 if not element.xpath(".//pytanie"):
429 qpre, qpost = self.handle_pytanie(element)
434 def handle_pytanie(self, element):
435 """This will handle <cwiczenie> element, when there is no <pytanie>
437 self.question_counter += 1
438 self.piece_counter = 0
440 if self.options['teacher'] and element.attrib.get('rozw'):
441 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
444 def handle_punkt(self, element):
445 pre, post = super(Exercise, self).handle_punkt(element)
446 if self.options['teacher'] and element.attrib.get('rozw'):
447 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
450 def solution_header(self):
451 par = etree.Element("cmd", name="par")
452 parm = etree.Element("parm")
453 parm.text = u"Rozwiązanie:"
455 return etree.tostring(par)
457 def explicit_solution(self):
458 if self.options['solution']:
459 par = etree.Element("cmd", name="par")
460 parm = etree.Element("parm")
461 parm.text = self.options['solution']
463 return self.solution_header() + etree.tostring(par)
467 class Wybor(Exercise):
468 def handle_cwiczenie(self, element):
469 pre, post = super(Wybor, self).handle_cwiczenie(element)
470 is_single_choice = True
471 pytania = element.xpath(".//pytanie")
475 solutions = re.split(r"[, ]+", p.attrib['rozw'])
476 if len(solutions) != 1:
477 is_single_choice = False
479 choices = p.xpath(".//*[@nazwa]")
481 for n in choices: uniq.add(n.attrib['nazwa'])
482 if len(choices) != len(uniq):
483 is_single_choice = False
486 self.options = {'single': is_single_choice}
489 def handle_punkt(self, element):
490 if self.options['exercise'] and element.attrib.get('nazwa', None):
491 cmd = 'radio' if self.options['single'] else 'checkbox'
492 return u'<cmd name="%s"/>' % cmd, ''
494 return super(Wybor, self).handle_punkt(element)
497 class Uporzadkuj(Exercise):
498 def handle_pytanie(self, element):
499 order_items = element.xpath(".//punkt/@rozw")
500 return super(Uporzadkuj, self).handle_pytanie(element)
503 class Przyporzadkuj(Exercise):
504 def handle_lista(self, lista):
505 header = etree.Element("parm")
506 header_cmd = etree.Element("cmd", name="par")
507 header_cmd.append(header)
508 if 'nazwa' in lista.attrib:
509 header.text = u"Kategorie:"
510 elif 'cel' in lista.attrib:
511 header.text = u"Elementy do przyporządkowania:"
513 header.text = u"Lista:"
514 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
515 pre = etree.tostring(header_cmd, encoding=unicode) + pre
519 class Luki(Exercise):
520 def find_pieces(self, question):
521 return question.xpath(".//luka")
523 def solution(self, piece):
524 piece = deepcopy(piece)
527 return sub.generate(piece)
529 def handle_pytanie(self, element):
530 qpre, qpost = super(Luki, self).handle_pytanie(element)
532 luki = self.find_pieces(element)
534 self.words = u"<env name='itemize'>%s</env>" % (
535 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
539 def handle_opis(self, element):
540 return '', self.words
542 def handle_luka(self, element):
544 if self.options['teacher']:
545 piece = deepcopy(element)
548 text = sub.generate(piece)
549 luka += u" [rozwiązanie: %s]" % text
554 def find_pieces(self, question):
555 return question.xpath(".//zastap")
557 def solution(self, piece):
558 return piece.attrib['rozw']
560 def list_header(self):
561 return u"Elementy do wstawienia"
563 def handle_zastap(self, element):
564 piece = deepcopy(element)
567 text = sub.generate(piece)
568 if self.options['teacher'] and element.attrib.get('rozw'):
569 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
573 class PrawdaFalsz(Exercise):
574 def handle_punkt(self, element):
575 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
576 if 'rozw' in element.attrib:
577 post += u" [Prawda/Fałsz]"
583 lists = tree.xpath(".//lista")
588 if p.tail is None: p.tail = ''
592 if p.text is None: p.text = ''
598 class EduModulePDFFormat(PDFFormat):
599 style = get_resource('res/styles/edumed/pdf/edumed.sty')
602 self.attachments = {}
606 "teacher": self.customization.get('teacher'),
608 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
610 open("/tmp/texml.xml", "w").write(texml)
613 def get_tex_dir(self):
614 temp = super(EduModulePDFFormat, self).get_tex_dir()
615 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
616 for name, iofile in self.attachments.items():
617 iofile.save_as(os.path.join(temp, name))
620 def get_image(self, name):
621 return self.wldoc.source.attachments[name]