1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.exercise_counter = 0
74 def swap_endlines(txt):
75 if self.options['strofa']:
76 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
78 self.register_text_filter(swap_endlines)
79 self.register_text_filter(functions.substitute_entities)
80 self.register_text_filter(mark_alien_characters)
82 def get_dc(self, element, dc_field, single=False):
83 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
88 def handle_rdf__RDF(self, _):
89 "skip metadata in generation"
93 def get_rightsinfo(self, element):
94 rights_lic = self.get_dc(element, 'rights.license', True)
95 return u'<cmd name="rightsinfostr">' + \
96 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
115 def handle_utwor(self, element):
118 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
120 \\documentclass[%s]{wl}
121 \\usepackage{style}''' % self.options['customization_str'],
122 self.options['has_cover'] and '\usepackage{makecover}',
123 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
124 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
125 (self.options['morefloats'] == 'none' and
126 u'''\\IfFileExists{morefloats.sty}{
127 \\usepackage{morefloats}
129 u'''\\def\\authors{%s}''' % self.get_authors(element),
130 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
131 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
132 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
134 u'''\\author{\\authors}''',
135 u'''\\title{%s}''' % self.get_title(element),
136 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
137 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
140 return u"".join(filter(None, lines)), u'</TeXML>'
144 def handle_powiesc(self, element):
146 <env name="document">
147 <cmd name="maketitle"/>
148 """, """<cmd name="editorialsection" /></env>"""
151 def handle_texcommand(self, element):
152 cmd = functions.texcommand(element.tag)
153 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
159 handle_akap_dialog = \
160 handle_akap_dialog = \
161 handle_autor_utworu = \
163 handle_didaskalia = \
164 handle_didask_tekst = \
165 handle_dlugi_cytat = \
166 handle_dzielo_nadrzedne = \
167 handle_lista_osoba = \
169 handle_miejsce_czas = \
171 handle_motto_podpis = \
172 handle_naglowek_akt = \
173 handle_naglowek_czesc = \
174 handle_naglowek_listy = \
175 handle_naglowek_osoba = \
176 handle_naglowek_podrozdzial = \
177 handle_naglowek_scena = \
178 handle_nazwa_utworu = \
184 handle_poezja_cyt = \
187 handle_sekcja_asterysk = \
188 handle_sekcja_swiatlo = \
189 handle_separator_linia = \
190 handle_slowo_obce = \
192 handle_tytul_dziela = \
193 handle_wyroznienie = \
197 def handle_naglowek_rozdzial(self, element):
198 if not self.options['teacher']:
199 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek')):
200 self.state['mute'] = False
202 self.state['mute'] = True
204 return self.handle_texcommand(element)
205 handle_naglowek_rozdzial.unmuter = True
208 def handle_uwaga(self, _e):
210 def handle_extra(self, _e):
213 def handle_nbsp(self, _e):
214 return '<spec cat="tilde" />'
216 _handle_strofa = cmd("strofa")
218 def handle_strofa(self, element):
219 self.options = {'strofa': True}
220 return self._handle_strofa(element)
222 def handle_aktywnosc(self, element):
223 self.activity_counter += 1
226 'activity_counter': self.activity_counter,
229 submill = EduModule(self.options, self.state)
231 if element.xpath('opis'):
232 opis = submill.generate(element.xpath('opis')[0])
236 n = element.xpath('wskazowki')
237 if n: wskazowki = submill.generate(n[0])
240 n = element.xpath('pomoce')
242 if n: pomoce = submill.generate(n[0])
245 forma = ''.join(element.xpath('forma/text()'))
247 czas = ''.join(element.xpath('czas/text()'))
249 counter = self.activity_counter
252 <cmd name="noindent" />
253 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
254 <cmd name="activityinfo"><parm>
255 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
256 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
257 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
266 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
267 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
269 @ifoption(sub_gen=True)
270 def handle_pomoce(self, _):
271 return "Pomoce: ", ""
273 def handle_czas(self, *_):
276 def handle_forma(self, *_):
279 def handle_lista(self, element, attrs={}):
280 ltype = element.attrib.get('typ', 'punkt')
281 if not element.findall("punkt"):
282 if ltype == 'czytelnia':
283 return 'W przygotowaniu.'
286 if ltype == 'slowniczek':
287 surl = element.attrib.get('src', None)
289 # print '** missing src on <slowniczek>, setting default'
290 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
293 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
294 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
296 listcmd = {'num': 'enumerate',
299 'slowniczek': 'itemize',
300 'czytelnia': 'itemize'}[ltype]
302 return u'<env name="%s">' % listcmd, u'</env>'
304 def handle_punkt(self, element):
305 return '<cmd name="item"/>', ''
307 def handle_cwiczenie(self, element):
308 exercise_handlers = {
310 'uporzadkuj': Uporzadkuj,
313 'przyporzadkuj': Przyporzadkuj,
314 'prawdafalsz': PrawdaFalsz
317 typ = element.attrib['typ']
318 self.exercise_counter += 1
319 if not typ in exercise_handlers:
320 return '(no handler)'
321 self.options = {'exercise_counter': self.exercise_counter}
322 handler = exercise_handlers[typ](self.options, self.state)
323 return handler.generate(element)
325 # XXX this is copied from pyhtml.py, except for return and
326 # should be refactored for no code duplication
327 def handle_definiendum(self, element):
328 nxt = element.getnext()
331 # let's pull definiens from another document
332 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
333 sxml = self.options['slowniczek_xml']
334 assert element.text != ''
335 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
337 definiens = defloc[0].getnext()
338 if definiens.tag == 'definiens':
339 subgen = EduModule(self.options, self.state)
340 definiens_s = subgen.generate(definiens)
342 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
344 def handle_definiens(self, element):
347 def handle_podpis(self, element):
348 return u"""<env name="figure">""", u"</env>"
350 def handle_tabela(self, element):
352 for w in element.xpath("wiersz"):
354 if max_col < len(ks):
356 self.options = {'columnts': max_col}
358 # has_frames = int(element.attrib.get("ramki", "0"))
359 # if has_frames: frames_c = "framed"
360 # else: frames_c = ""
361 # return u"""<table class="%s">""" % frames_c, u"</table>"
363 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
364 ''' % ('l' * max_col), \
365 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
368 def handle_wiersz(self, element):
369 return u"", u'<ctrl ch="\\"/>'
372 def handle_kol(self, element):
373 if element.getnext() is not None:
374 return u"", u'<spec cat="align" />'
377 def handle_link(self, element):
378 if element.attrib.get('url'):
379 url = element.attrib.get('url')
380 if url == element.text:
381 return cmd('url')(self, element)
383 return cmd('href', parms=[element.attrib['url']])(self, element)
385 return cmd('emph')(self, element)
387 def handle_obraz(self, element):
388 frmt = self.options['format']
389 name = element.attrib.get('nazwa', '').strip()
390 image = frmt.get_image(name.strip())
391 img_path = "obraz/%s" % name.replace("_", "")
392 frmt.attachments[img_path] = image
393 return cmd("obraz", parms=[img_path])(self)
395 def handle_video(self, element):
396 url = element.attrib.get('url')
398 print '!! <video> missing url'
400 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
402 print '!! unknown <video> url scheme:', url
405 thumb = IOFile.from_string(urlopen
406 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
407 img_path = "video/%s.jpg" % name.replace("_", "")
408 self.options['format'].attachments[img_path] = thumb
409 canon_url = "https://www.youtube.com/watch?v=%s" % name
410 return cmd("video", parms=[img_path, canon_url])(self)
413 class Exercise(EduModule):
414 def __init__(self, *args, **kw):
415 self.question_counter = 0
416 super(Exercise, self).__init__(*args, **kw)
418 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
420 def handle_cwiczenie(self, element):
422 'exercise': element.attrib['typ'],
425 self.question_counter = 0
426 self.piece_counter = 0
428 header = etree.Element("parm")
429 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
430 header_cmd.append(header)
431 header.text = u"Zadanie %d." % self.options['exercise_counter']
433 pre = etree.tostring(header_cmd, encoding=unicode)
435 # Add a single <pytanie> tag if it's not there
436 if not element.xpath(".//pytanie"):
437 qpre, qpost = self.handle_pytanie(element)
442 def handle_pytanie(self, element):
443 """This will handle <cwiczenie> element, when there is no <pytanie>
445 self.question_counter += 1
446 self.piece_counter = 0
448 if self.options['teacher'] and element.attrib.get('rozw'):
449 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
452 def handle_punkt(self, element):
453 pre, post = super(Exercise, self).handle_punkt(element)
454 if self.options['teacher'] and element.attrib.get('rozw'):
455 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
458 def solution_header(self):
459 par = etree.Element("cmd", name="par")
460 parm = etree.Element("parm")
461 parm.text = u"Rozwiązanie:"
463 return etree.tostring(par)
465 def explicit_solution(self):
466 if self.options['solution']:
467 par = etree.Element("cmd", name="par")
468 parm = etree.Element("parm")
469 parm.text = self.options['solution']
471 return self.solution_header() + etree.tostring(par)
475 class Wybor(Exercise):
476 def handle_cwiczenie(self, element):
477 pre, post = super(Wybor, self).handle_cwiczenie(element)
478 is_single_choice = True
479 pytania = element.xpath(".//pytanie")
483 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
484 if len(solutions) != 1:
485 is_single_choice = False
487 choices = p.xpath(".//*[@nazwa]")
489 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
490 if len(choices) != len(uniq):
491 is_single_choice = False
494 self.options = {'single': is_single_choice}
497 def handle_punkt(self, element):
498 if self.options['exercise'] and element.attrib.get('nazwa', None):
499 cmd = 'radio' if self.options['single'] else 'checkbox'
500 return u'<cmd name="%s"/>' % cmd, ''
502 return super(Wybor, self).handle_punkt(element)
505 class Uporzadkuj(Exercise):
506 def handle_pytanie(self, element):
507 order_items = element.xpath(".//punkt/@rozw")
508 return super(Uporzadkuj, self).handle_pytanie(element)
511 class Przyporzadkuj(Exercise):
512 def handle_lista(self, lista):
513 header = etree.Element("parm")
514 header_cmd = etree.Element("cmd", name="par")
515 header_cmd.append(header)
516 if 'nazwa' in lista.attrib:
517 header.text = u"Kategorie:"
518 elif 'cel' in lista.attrib:
519 header.text = u"Elementy do przyporządkowania:"
521 header.text = u"Lista:"
522 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
523 pre = etree.tostring(header_cmd, encoding=unicode) + pre
527 class Luki(Exercise):
528 def find_pieces(self, question):
529 return question.xpath(".//luka")
531 def solution(self, piece):
532 piece = deepcopy(piece)
535 return sub.generate(piece)
537 def handle_pytanie(self, element):
538 qpre, qpost = super(Luki, self).handle_pytanie(element)
540 luki = self.find_pieces(element)
542 self.words = u"<env name='itemize'>%s</env>" % (
543 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
547 def handle_opis(self, element):
548 return '', self.words
550 def handle_luka(self, element):
552 if self.options['teacher']:
553 piece = deepcopy(element)
556 text = sub.generate(piece)
557 luka += u" [rozwiązanie: %s]" % text
562 def find_pieces(self, question):
563 return question.xpath(".//zastap")
565 def solution(self, piece):
566 return piece.attrib.get('rozw', '')
568 def list_header(self):
569 return u"Elementy do wstawienia"
571 def handle_zastap(self, element):
572 piece = deepcopy(element)
575 text = sub.generate(piece)
576 if self.options['teacher'] and element.attrib.get('rozw'):
577 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
581 class PrawdaFalsz(Exercise):
582 def handle_punkt(self, element):
583 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
584 if 'rozw' in element.attrib:
585 post += u" [Prawda/Fałsz]"
591 lists = tree.xpath(".//lista")
596 if p.tail is None: p.tail = ''
600 if p.text is None: p.text = ''
606 class EduModulePDFFormat(PDFFormat):
607 style = get_resource('res/styles/edumed/pdf/edumed.sty')
610 substitute_hyphens(self.wldoc.edoc)
611 fix_hanging(self.wldoc.edoc)
613 self.attachments = {}
617 "teacher": self.customization.get('teacher'),
619 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
621 open("/tmp/texml.xml", "w").write(texml)
624 def get_tex_dir(self):
625 temp = super(EduModulePDFFormat, self).get_tex_dir()
626 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
627 for name, iofile in self.attachments.items():
628 iofile.save_as(os.path.join(temp, name))
631 def get_image(self, name):
632 return self.wldoc.source.attachments[name]