1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 "skip metadata in generation"
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + \
97 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
98 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
102 def get_authors(self, element, which=None):
103 dc = self.options['wldoc'].book_info
105 authors = dc.authors_textbook + \
106 dc.authors_scenario + \
109 authors = getattr(dc, "authors_%s" % which)
110 return u', '.join(author.readable() for author in authors)
113 def get_title(self, element):
114 return self.get_dc(element, 'title', True)
116 def handle_utwor(self, element):
119 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
121 \\documentclass[%s]{wl}
122 \\usepackage{style}''' % self.options['customization_str'],
123 self.options['has_cover'] and '\usepackage{makecover}',
124 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
125 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
126 (self.options['morefloats'] == 'none' and
127 u'''\\IfFileExists{morefloats.sty}{
128 \\usepackage{morefloats}
130 u'''\\def\\authors{%s}''' % self.get_authors(element),
131 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
132 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
133 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
135 u'''\\author{\\authors}''',
136 u'''\\title{%s}''' % self.get_title(element),
137 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
138 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
141 return u"".join(filter(None, lines)), u'</TeXML>'
145 def handle_powiesc(self, element):
147 <env name="document">
148 <cmd name="maketitle"/>
149 """, """<cmd name="editorialsection" /></env>"""
152 def handle_texcommand(self, element):
153 cmd = functions.texcommand(element.tag)
154 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
160 handle_akap_dialog = \
161 handle_akap_dialog = \
162 handle_autor_utworu = \
164 handle_didaskalia = \
165 handle_didask_tekst = \
166 handle_dlugi_cytat = \
167 handle_dzielo_nadrzedne = \
168 handle_lista_osoba = \
170 handle_miejsce_czas = \
172 handle_motto_podpis = \
173 handle_naglowek_akt = \
174 handle_naglowek_czesc = \
175 handle_naglowek_listy = \
176 handle_naglowek_osoba = \
177 handle_naglowek_scena = \
178 handle_nazwa_utworu = \
184 handle_poezja_cyt = \
187 handle_sekcja_asterysk = \
188 handle_sekcja_swiatlo = \
189 handle_separator_linia = \
190 handle_slowo_obce = \
192 handle_tytul_dziela = \
193 handle_wyroznienie = \
197 def handle_naglowek_rozdzial(self, element):
198 if not self.options['teacher']:
199 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek')):
200 self.state['mute'] = False
202 self.state['mute'] = True
204 return self.handle_texcommand(element)
205 handle_naglowek_rozdzial.unmuter = True
207 def handle_naglowek_podrozdzial(self, element):
208 self.activity_counter = 0
209 return self.handle_texcommand(element)
211 def handle_uwaga(self, _e):
213 def handle_extra(self, _e):
216 def handle_nbsp(self, _e):
217 return '<spec cat="tilde" />'
219 _handle_strofa = cmd("strofa")
221 def handle_strofa(self, element):
222 self.options = {'strofa': True}
223 return self._handle_strofa(element)
225 def handle_aktywnosc(self, element):
226 self.activity_counter += 1
229 'activity_counter': self.activity_counter,
232 submill = EduModule(self.options, self.state)
234 if element.xpath('opis'):
235 opis = submill.generate(element.xpath('opis')[0])
239 n = element.xpath('wskazowki')
240 if n: wskazowki = submill.generate(n[0])
243 n = element.xpath('pomoce')
245 if n: pomoce = submill.generate(n[0])
248 forma = ''.join(element.xpath('forma/text()'))
250 czas = ''.join(element.xpath('czas/text()'))
252 counter = self.activity_counter
254 if element.getnext().tag == 'aktywnosc' or self.activity_last.getnext() == element:
255 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
259 self.activity_last = element
262 <cmd name="noindent" />
264 <cmd name="activityinfo"><parm>
265 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
266 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
267 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
276 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
277 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
279 @ifoption(sub_gen=True)
280 def handle_pomoce(self, _):
281 return "Pomoce: ", ""
283 def handle_czas(self, *_):
286 def handle_forma(self, *_):
289 def handle_lista(self, element, attrs={}):
290 ltype = element.attrib.get('typ', 'punkt')
291 if not element.findall("punkt"):
292 if ltype == 'czytelnia':
293 return 'W przygotowaniu.'
296 if ltype == 'slowniczek':
297 surl = element.attrib.get('src', None)
299 # print '** missing src on <slowniczek>, setting default'
300 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
303 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
304 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
306 listcmd = {'num': 'enumerate',
309 'slowniczek': 'itemize',
310 'czytelnia': 'itemize'}[ltype]
312 return u'<env name="%s">' % listcmd, u'</env>'
314 def handle_punkt(self, element):
315 return '<cmd name="item"/>', ''
317 def handle_cwiczenie(self, element):
318 exercise_handlers = {
320 'uporzadkuj': Uporzadkuj,
323 'przyporzadkuj': Przyporzadkuj,
324 'prawdafalsz': PrawdaFalsz
327 typ = element.attrib['typ']
328 self.exercise_counter += 1
329 if not typ in exercise_handlers:
330 return '(no handler)'
331 self.options = {'exercise_counter': self.exercise_counter}
332 handler = exercise_handlers[typ](self.options, self.state)
333 return handler.generate(element)
335 # XXX this is copied from pyhtml.py, except for return and
336 # should be refactored for no code duplication
337 def handle_definiendum(self, element):
338 nxt = element.getnext()
341 # let's pull definiens from another document
342 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
343 sxml = self.options['slowniczek_xml']
344 assert element.text != ''
345 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
347 definiens = defloc[0].getnext()
348 if definiens.tag == 'definiens':
349 subgen = EduModule(self.options, self.state)
350 definiens_s = subgen.generate(definiens)
352 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
354 def handle_definiens(self, element):
357 def handle_podpis(self, element):
358 return u"""<env name="figure">""", u"</env>"
360 def handle_tabela(self, element):
362 for w in element.xpath("wiersz"):
364 if max_col < len(ks):
366 self.options = {'columnts': max_col}
368 # has_frames = int(element.attrib.get("ramki", "0"))
369 # if has_frames: frames_c = "framed"
370 # else: frames_c = ""
371 # return u"""<table class="%s">""" % frames_c, u"</table>"
373 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
374 ''' % ('l' * max_col), \
375 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
378 def handle_wiersz(self, element):
379 return u"", u'<ctrl ch="\\"/>'
382 def handle_kol(self, element):
383 if element.getnext() is not None:
384 return u"", u'<spec cat="align" />'
387 def handle_link(self, element):
388 if element.attrib.get('url'):
389 url = element.attrib.get('url')
390 if url == element.text:
391 return cmd('url')(self, element)
393 return cmd('href', parms=[element.attrib['url']])(self, element)
395 return cmd('emph')(self, element)
397 def handle_obraz(self, element):
398 frmt = self.options['format']
399 name = element.attrib.get('nazwa', '').strip()
400 image = frmt.get_image(name.strip())
401 img_path = "obraz/%s" % name.replace("_", "")
402 frmt.attachments[img_path] = image
403 return cmd("obraz", parms=[img_path])(self)
405 def handle_video(self, element):
406 url = element.attrib.get('url')
408 print '!! <video> missing url'
410 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
412 print '!! unknown <video> url scheme:', url
415 thumb = IOFile.from_string(urlopen
416 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
417 img_path = "video/%s.jpg" % name.replace("_", "")
418 self.options['format'].attachments[img_path] = thumb
419 canon_url = "https://www.youtube.com/watch?v=%s" % name
420 return cmd("video", parms=[img_path, canon_url])(self)
423 class Exercise(EduModule):
424 def __init__(self, *args, **kw):
425 self.question_counter = 0
426 super(Exercise, self).__init__(*args, **kw)
428 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
430 def handle_cwiczenie(self, element):
432 'exercise': element.attrib['typ'],
435 self.question_counter = 0
436 self.piece_counter = 0
438 header = etree.Element("parm")
439 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
440 header_cmd.append(header)
441 header.text = u"Zadanie %d." % self.options['exercise_counter']
443 pre = etree.tostring(header_cmd, encoding=unicode)
445 # Add a single <pytanie> tag if it's not there
446 if not element.xpath(".//pytanie"):
447 qpre, qpost = self.handle_pytanie(element)
452 def handle_pytanie(self, element):
453 """This will handle <cwiczenie> element, when there is no <pytanie>
455 self.question_counter += 1
456 self.piece_counter = 0
458 if self.options['teacher'] and element.attrib.get('rozw'):
459 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
462 def handle_punkt(self, element):
463 pre, post = super(Exercise, self).handle_punkt(element)
464 if self.options['teacher'] and element.attrib.get('rozw'):
465 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
468 def solution_header(self):
469 par = etree.Element("cmd", name="par")
470 parm = etree.Element("parm")
471 parm.text = u"Rozwiązanie:"
473 return etree.tostring(par)
475 def explicit_solution(self):
476 if self.options['solution']:
477 par = etree.Element("cmd", name="par")
478 parm = etree.Element("parm")
479 parm.text = self.options['solution']
481 return self.solution_header() + etree.tostring(par)
485 class Wybor(Exercise):
486 def handle_cwiczenie(self, element):
487 pre, post = super(Wybor, self).handle_cwiczenie(element)
488 is_single_choice = True
489 pytania = element.xpath(".//pytanie")
493 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
494 if len(solutions) != 1:
495 is_single_choice = False
497 choices = p.xpath(".//*[@nazwa]")
499 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
500 if len(choices) != len(uniq):
501 is_single_choice = False
504 self.options = {'single': is_single_choice}
507 def handle_punkt(self, element):
508 if self.options['exercise'] and element.attrib.get('nazwa', None):
509 cmd = 'radio' if self.options['single'] else 'checkbox'
510 return u'<cmd name="%s"/>' % cmd, ''
512 return super(Wybor, self).handle_punkt(element)
515 class Uporzadkuj(Exercise):
516 def handle_pytanie(self, element):
517 order_items = element.xpath(".//punkt/@rozw")
518 return super(Uporzadkuj, self).handle_pytanie(element)
521 class Przyporzadkuj(Exercise):
522 def handle_lista(self, lista):
523 header = etree.Element("parm")
524 header_cmd = etree.Element("cmd", name="par")
525 header_cmd.append(header)
526 if 'nazwa' in lista.attrib:
527 header.text = u"Kategorie:"
528 elif 'cel' in lista.attrib:
529 header.text = u"Elementy do przyporządkowania:"
531 header.text = u"Lista:"
532 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
533 pre = etree.tostring(header_cmd, encoding=unicode) + pre
537 class Luki(Exercise):
538 def find_pieces(self, question):
539 return question.xpath(".//luka")
541 def solution(self, piece):
542 piece = deepcopy(piece)
545 return sub.generate(piece)
547 def handle_pytanie(self, element):
548 qpre, qpost = super(Luki, self).handle_pytanie(element)
550 luki = self.find_pieces(element)
552 self.words = u"<env name='itemize'>%s</env>" % (
553 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
557 def handle_opis(self, element):
558 return '', self.words
560 def handle_luka(self, element):
562 if self.options['teacher']:
563 piece = deepcopy(element)
566 text = sub.generate(piece)
567 luka += u" [rozwiązanie: %s]" % text
572 def find_pieces(self, question):
573 return question.xpath(".//zastap")
575 def solution(self, piece):
576 return piece.attrib.get('rozw', '')
578 def list_header(self):
579 return u"Elementy do wstawienia"
581 def handle_zastap(self, element):
582 piece = deepcopy(element)
585 text = sub.generate(piece)
586 if self.options['teacher'] and element.attrib.get('rozw'):
587 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
591 class PrawdaFalsz(Exercise):
592 def handle_punkt(self, element):
593 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
594 if 'rozw' in element.attrib:
595 post += u" [Prawda/Fałsz]"
601 lists = tree.xpath(".//lista")
606 if p.tail is None: p.tail = ''
610 if p.text is None: p.text = ''
616 class EduModulePDFFormat(PDFFormat):
617 style = get_resource('res/styles/edumed/pdf/edumed.sty')
620 substitute_hyphens(self.wldoc.edoc)
621 fix_hanging(self.wldoc.edoc)
623 self.attachments = {}
627 "teacher": self.customization.get('teacher'),
629 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
631 open("/tmp/texml.xml", "w").write(texml)
634 def get_tex_dir(self):
635 temp = super(EduModulePDFFormat, self).get_tex_dir()
636 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
637 for name, iofile in self.attachments.items():
638 iofile.save_as(os.path.join(temp, name))
641 def get_image(self, name):
642 return self.wldoc.source.attachments[name]