1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, ifoption, tag_open_close
22 from librarian import DCNS, get_resource, IOFile
23 from librarian import functions
24 from pdf import PDFFormat, substitute_hyphens, fix_hanging
29 def _wrap(*args, **kw):
30 value = f(*args, **kw)
32 prefix = (u'<TeXML escape="%d">' % (1 if really else 0))
34 if isinstance(value, list):
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 """skip metadata in generation"""
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors if author)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
116 def get_description(self, element):
117 return self.get_dc(element, 'description', single=True)
119 def handle_utwor(self, element):
122 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
124 \\documentclass[%s]{wl}
125 \\usepackage{style}''' % self.options['customization_str'],
126 self.options['has_cover'] and '\usepackage{makecover}',
127 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
128 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
129 (self.options['morefloats'] == 'none' and
130 u'''\\IfFileExists{morefloats.sty}{
131 \\usepackage{morefloats}
133 u'''\\def\\authors{%s}''' % self.get_authors(element),
134 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
135 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
136 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
137 u'''\\def\\description{%s}''' % self.get_description(element),
139 u'''\\author{\\authors}''',
140 u'''\\title{%s}''' % self.get_title(element),
141 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
142 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
146 return u"".join(filter(None, lines)), u'</TeXML>'
149 def handle_powiesc(self, element):
151 <env name="document">
152 <cmd name="maketitle"/>
153 """, """<cmd name="editorialsection" /></env>"""
156 def handle_texcommand(self, element):
157 cmd = functions.texcommand(element.tag)
158 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
162 handle_akap_dialog = \
163 handle_autor_utworu = \
165 handle_didaskalia = \
166 handle_didask_tekst = \
167 handle_dlugi_cytat = \
168 handle_dzielo_nadrzedne = \
169 handle_lista_osoba = \
171 handle_miejsce_czas = \
173 handle_motto_podpis = \
174 handle_naglowek_akt = \
175 handle_naglowek_czesc = \
176 handle_naglowek_listy = \
177 handle_naglowek_osoba = \
178 handle_naglowek_scena = \
179 handle_nazwa_utworu = \
185 handle_poezja_cyt = \
188 handle_sekcja_asterysk = \
189 handle_sekcja_swiatlo = \
190 handle_separator_linia = \
191 handle_slowo_obce = \
193 handle_tytul_dziela = \
194 handle_wyroznienie = \
198 def handle_naglowek_rozdzial(self, element):
199 if not self.options['teacher']:
200 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
201 self.state['mute'] = False
203 self.state['mute'] = True
205 return self.handle_texcommand(element)
206 handle_naglowek_rozdzial.unmuter = True
208 def handle_naglowek_podrozdzial(self, element):
209 self.activity_counter = 0
210 if not self.options['teacher']:
211 if element.text.startswith(u'Dla ucznia'):
212 self.state['mute'] = False
214 elif element.text.startswith(u'Dla nauczyciela'):
215 self.state['mute'] = True
217 elif self.state['mute']:
219 return self.handle_texcommand(element)
220 handle_naglowek_podrozdzial.unmuter = True
222 def handle_uwaga(self, _e):
225 def handle_extra(self, _e):
228 def handle_nbsp(self, _e):
229 return '<spec cat="tilde" />'
231 _handle_strofa = cmd("strofa")
233 def handle_strofa(self, element):
234 self.options = {'strofa': True}
235 return self._handle_strofa(element)
237 def handle_aktywnosc(self, element):
238 self.activity_counter += 1
241 'activity_counter': self.activity_counter,
244 submill = EduModule(self.options, self.state)
246 if element.xpath('opis'):
247 opis = submill.generate(element.xpath('opis')[0])
251 n = element.xpath('wskazowki')
253 wskazowki = submill.generate(n[0])
256 n = element.xpath('pomoce')
259 pomoce = submill.generate(n[0])
263 forma = ''.join(element.xpath('forma/text()'))
265 czas = ''.join(element.xpath('czas/text()'))
267 counter = self.activity_counter
269 if element.getnext().tag == 'aktywnosc' or (len(self.activity_last) and self.activity_last.getnext() == element):
270 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
274 self.activity_last = element
277 <cmd name="noindent" />
279 <cmd name="activityinfo"><parm>
280 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
281 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
282 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
291 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
292 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
294 @ifoption(sub_gen=True)
295 def handle_pomoce(self, _):
296 return "Pomoce: ", ""
298 def handle_czas(self, *_):
301 def handle_forma(self, *_):
304 def handle_lista(self, element, attrs=None):
305 ltype = element.attrib.get('typ', 'punkt')
306 if not element.findall("punkt"):
307 if ltype == 'czytelnia':
308 return 'W przygotowaniu.'
311 if ltype == 'slowniczek':
312 surl = element.attrib.get('src', None)
314 # print '** missing src on <slowniczek>, setting default'
315 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
316 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
317 self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
323 'slowniczek': 'itemize',
324 'czytelnia': 'itemize'
327 return u'<env name="%s">' % listcmd, u'</env>'
329 def handle_punkt(self, element):
330 return '<cmd name="item"/>', ''
332 def handle_cwiczenie(self, element):
333 exercise_handlers = {
335 'uporzadkuj': Uporzadkuj,
338 'przyporzadkuj': Przyporzadkuj,
339 'prawdafalsz': PrawdaFalsz
342 typ = element.attrib['typ']
343 self.exercise_counter += 1
344 if typ not in exercise_handlers:
345 return '(no handler)'
346 self.options = {'exercise_counter': self.exercise_counter}
347 handler = exercise_handlers[typ](self.options, self.state)
348 return handler.generate(element)
350 # XXX this is copied from pyhtml.py, except for return and
351 # should be refactored for no code duplication
352 def handle_definiendum(self, element):
353 nxt = element.getnext()
356 # let's pull definiens from another document
357 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
358 sxml = self.options['slowniczek_xml']
359 assert element.text != ''
360 if "'" in (element.text or ''):
361 defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
363 defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
365 definiens = defloc[0].getnext()
366 if definiens.tag == 'definiens':
367 subgen = EduModule(self.options, self.state)
368 definiens_s = subgen.generate(definiens)
370 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
372 def handle_definiens(self, element):
375 def handle_podpis(self, element):
376 return u"""<env name="figure">""", u"</env>"
378 def handle_tabela(self, element):
380 for w in element.xpath("wiersz"):
382 if max_col < len(ks):
384 self.options = {'columnts': max_col}
386 # has_frames = int(element.attrib.get("ramki", "0"))
387 # if has_frames: frames_c = "framed"
388 # else: frames_c = ""
389 # return u"""<table class="%s">""" % frames_c, u"</table>"
391 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
392 ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
395 def handle_wiersz(self, element):
396 return u"", u'<ctrl ch="\\"/>'
399 def handle_kol(self, element):
400 if element.getnext() is not None:
401 return u"", u'<spec cat="align" />'
404 def handle_link(self, element):
405 if element.attrib.get('url'):
406 url = element.attrib.get('url')
407 if url == element.text:
408 return cmd('url')(self, element)
410 return cmd('href', parms=[element.attrib['url']])(self, element)
412 return cmd('emph')(self, element)
414 def handle_obraz(self, element):
415 frmt = self.options['format']
416 name = element.attrib.get('nazwa', '').strip()
417 image = frmt.get_image(name.strip())
418 name = image.get_filename().rsplit('/', 1)[-1]
419 img_path = "obraz/%s" % name.replace("_", "")
420 frmt.attachments[img_path] = image
421 return cmd("obraz", parms=[img_path])(self)
423 def handle_video(self, element):
424 url = element.attrib.get('url')
426 print '!! <video> missing url'
428 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
430 print '!! unknown <video> url scheme:', url
433 thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
434 img_path = "video/%s.jpg" % name.replace("_", "")
435 self.options['format'].attachments[img_path] = thumb
436 canon_url = "https://www.youtube.com/watch?v=%s" % name
437 return cmd("video", parms=[img_path, canon_url])(self)
440 class Exercise(EduModule):
441 def __init__(self, *args, **kw):
442 self.question_counter = 0
443 super(Exercise, self).__init__(*args, **kw)
444 self.piece_counter = None
446 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
448 def handle_cwiczenie(self, element):
450 'exercise': element.attrib['typ'],
453 self.question_counter = 0
454 self.piece_counter = 0
456 header = etree.Element("parm")
457 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
458 header_cmd.append(header)
459 header.text = u"Zadanie %d." % self.options['exercise_counter']
461 pre = etree.tostring(header_cmd, encoding=unicode)
463 # Add a single <pytanie> tag if it's not there
464 if not element.xpath(".//pytanie"):
465 qpre, qpost = self.handle_pytanie(element)
470 def handle_pytanie(self, element):
471 """This will handle <cwiczenie> element, when there is no <pytanie>
473 self.question_counter += 1
474 self.piece_counter = 0
476 if self.options['teacher'] and element.attrib.get('rozw'):
477 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
480 def handle_punkt(self, element):
481 pre, post = super(Exercise, self).handle_punkt(element)
482 if self.options['teacher'] and element.attrib.get('rozw'):
483 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
486 def solution_header(self):
487 par = etree.Element("cmd", name="par")
488 parm = etree.Element("parm")
489 parm.text = u"Rozwiązanie:"
491 return etree.tostring(par)
493 def explicit_solution(self):
494 if self.options['solution']:
495 par = etree.Element("cmd", name="par")
496 parm = etree.Element("parm")
497 parm.text = self.options['solution']
499 return self.solution_header() + etree.tostring(par)
502 class Wybor(Exercise):
503 def handle_cwiczenie(self, element):
504 pre, post = super(Wybor, self).handle_cwiczenie(element)
505 is_single_choice = True
506 pytania = element.xpath(".//pytanie")
510 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
511 if len(solutions) != 1:
512 is_single_choice = False
514 choices = p.xpath(".//*[@nazwa]")
517 uniq.add(n.attrib.get('nazwa', ''))
518 if len(choices) != len(uniq):
519 is_single_choice = False
522 self.options = {'single': is_single_choice}
525 def handle_punkt(self, element):
526 if self.options['exercise'] and element.attrib.get('nazwa', None):
527 cmd = 'radio' if self.options['single'] else 'checkbox'
528 return u'<cmd name="%s"/>' % cmd, ''
530 return super(Wybor, self).handle_punkt(element)
533 class Uporzadkuj(Exercise):
534 def handle_pytanie(self, element):
535 order_items = element.xpath(".//punkt/@rozw")
536 return super(Uporzadkuj, self).handle_pytanie(element)
539 class Przyporzadkuj(Exercise):
540 def handle_lista(self, lista):
541 header = etree.Element("parm")
542 header_cmd = etree.Element("cmd", name="par")
543 header_cmd.append(header)
544 if 'nazwa' in lista.attrib:
545 header.text = u"Kategorie:"
546 elif 'cel' in lista.attrib:
547 header.text = u"Elementy do przyporządkowania:"
549 header.text = u"Lista:"
550 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
551 pre = etree.tostring(header_cmd, encoding=unicode) + pre
555 class Luki(Exercise):
556 def find_pieces(self, question):
557 return question.xpath(".//luka")
559 def solution(self, piece):
560 piece = deepcopy(piece)
563 return sub.generate(piece)
565 def handle_pytanie(self, element):
566 qpre, qpost = super(Luki, self).handle_pytanie(element)
568 luki = self.find_pieces(element)
570 self.words = u"<env name='itemize'>%s</env>" % (
571 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
575 def handle_opis(self, element):
576 return '', self.words
578 def handle_luka(self, element):
580 if self.options['teacher']:
581 piece = deepcopy(element)
584 text = sub.generate(piece)
585 luka += u" [rozwiązanie: %s]" % text
590 def find_pieces(self, question):
591 return question.xpath(".//zastap")
593 def solution(self, piece):
594 return piece.attrib.get('rozw', '')
596 def list_header(self):
597 return u"Elementy do wstawienia"
599 def handle_zastap(self, element):
600 piece = deepcopy(element)
603 text = sub.generate(piece)
604 if self.options['teacher'] and element.attrib.get('rozw'):
605 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
609 class PrawdaFalsz(Exercise):
610 def handle_punkt(self, element):
611 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
612 if 'rozw' in element.attrib:
613 post += u" [Prawda/Fałsz]"
618 lists = tree.xpath(".//lista")
635 class EduModulePDFFormat(PDFFormat):
636 style = get_resource('res/styles/edumed/pdf/edumed.sty')
639 substitute_hyphens(self.wldoc.edoc)
640 fix_hanging(self.wldoc.edoc)
642 self.attachments = {}
646 "teacher": self.customization.get('teacher'),
648 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
650 open("/tmp/texml.xml", "w").write(texml)
653 def get_tex_dir(self):
654 temp = super(EduModulePDFFormat, self).get_tex_dir()
655 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
656 for name, iofile in self.attachments.items():
657 iofile.save_as(os.path.join(temp, name))
660 def get_image(self, name):
661 return self.wldoc.source.attachments[name]