1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 "skip metadata in generation"
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + \
97 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
98 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
102 def get_authors(self, element, which=None):
103 dc = self.options['wldoc'].book_info
105 authors = dc.authors_textbook + \
106 dc.authors_scenario + \
109 authors = getattr(dc, "authors_%s" % which)
110 return u', '.join(author.readable() for author in authors)
113 def get_title(self, element):
114 return self.get_dc(element, 'title', True)
116 def handle_utwor(self, element):
119 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
121 \\documentclass[%s]{wl}
122 \\usepackage{style}''' % self.options['customization_str'],
123 self.options['has_cover'] and '\usepackage{makecover}',
124 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
125 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
126 (self.options['morefloats'] == 'none' and
127 u'''\\IfFileExists{morefloats.sty}{
128 \\usepackage{morefloats}
130 u'''\\def\\authors{%s}''' % self.get_authors(element),
131 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
132 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
133 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
135 u'''\\author{\\authors}''',
136 u'''\\title{%s}''' % self.get_title(element),
137 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
138 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
141 return u"".join(filter(None, lines)), u'</TeXML>'
145 def handle_powiesc(self, element):
147 <env name="document">
148 <cmd name="maketitle"/>
149 """, """<cmd name="editorialsection" /></env>"""
152 def handle_texcommand(self, element):
153 cmd = functions.texcommand(element.tag)
154 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
160 handle_akap_dialog = \
161 handle_akap_dialog = \
162 handle_autor_utworu = \
164 handle_didaskalia = \
165 handle_didask_tekst = \
166 handle_dlugi_cytat = \
167 handle_dzielo_nadrzedne = \
168 handle_lista_osoba = \
170 handle_miejsce_czas = \
172 handle_motto_podpis = \
173 handle_naglowek_akt = \
174 handle_naglowek_czesc = \
175 handle_naglowek_listy = \
176 handle_naglowek_osoba = \
177 handle_naglowek_podrozdzial = \
178 handle_naglowek_scena = \
179 handle_nazwa_utworu = \
185 handle_poezja_cyt = \
188 handle_sekcja_asterysk = \
189 handle_sekcja_swiatlo = \
190 handle_separator_linia = \
191 handle_slowo_obce = \
193 handle_tytul_dziela = \
194 handle_wyroznienie = \
198 def handle_naglowek_rozdzial(self, element):
199 if not self.options['teacher']:
200 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek')):
201 self.state['mute'] = False
203 self.state['mute'] = True
205 return self.handle_texcommand(element)
206 handle_naglowek_rozdzial.unmuter = True
209 def handle_uwaga(self, _e):
211 def handle_extra(self, _e):
214 def handle_nbsp(self, _e):
215 return '<spec cat="tilde" />'
217 _handle_strofa = cmd("strofa")
219 def handle_strofa(self, element):
220 self.options = {'strofa': True}
221 return self._handle_strofa(element)
223 def handle_aktywnosc(self, element):
224 self.activity_counter += 1
227 'activity_counter': self.activity_counter,
230 submill = EduModule(self.options, self.state)
232 if element.xpath('opis'):
233 opis = submill.generate(element.xpath('opis')[0])
237 n = element.xpath('wskazowki')
238 if n: wskazowki = submill.generate(n[0])
241 n = element.xpath('pomoce')
243 if n: pomoce = submill.generate(n[0])
246 forma = ''.join(element.xpath('forma/text()'))
248 czas = ''.join(element.xpath('czas/text()'))
250 counter = self.activity_counter
252 if element.getnext().tag == 'aktywnosc' or self.activity_last.getnext() == element:
253 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
257 self.activity_last = element
260 <cmd name="noindent" />
262 <cmd name="activityinfo"><parm>
263 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
264 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
265 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
274 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
275 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
277 @ifoption(sub_gen=True)
278 def handle_pomoce(self, _):
279 return "Pomoce: ", ""
281 def handle_czas(self, *_):
284 def handle_forma(self, *_):
287 def handle_lista(self, element, attrs={}):
288 ltype = element.attrib.get('typ', 'punkt')
289 if not element.findall("punkt"):
290 if ltype == 'czytelnia':
291 return 'W przygotowaniu.'
294 if ltype == 'slowniczek':
295 surl = element.attrib.get('src', None)
297 # print '** missing src on <slowniczek>, setting default'
298 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
301 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
302 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
304 listcmd = {'num': 'enumerate',
307 'slowniczek': 'itemize',
308 'czytelnia': 'itemize'}[ltype]
310 return u'<env name="%s">' % listcmd, u'</env>'
312 def handle_punkt(self, element):
313 return '<cmd name="item"/>', ''
315 def handle_cwiczenie(self, element):
316 exercise_handlers = {
318 'uporzadkuj': Uporzadkuj,
321 'przyporzadkuj': Przyporzadkuj,
322 'prawdafalsz': PrawdaFalsz
325 typ = element.attrib['typ']
326 self.exercise_counter += 1
327 if not typ in exercise_handlers:
328 return '(no handler)'
329 self.options = {'exercise_counter': self.exercise_counter}
330 handler = exercise_handlers[typ](self.options, self.state)
331 return handler.generate(element)
333 # XXX this is copied from pyhtml.py, except for return and
334 # should be refactored for no code duplication
335 def handle_definiendum(self, element):
336 nxt = element.getnext()
339 # let's pull definiens from another document
340 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
341 sxml = self.options['slowniczek_xml']
342 assert element.text != ''
343 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
345 definiens = defloc[0].getnext()
346 if definiens.tag == 'definiens':
347 subgen = EduModule(self.options, self.state)
348 definiens_s = subgen.generate(definiens)
350 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
352 def handle_definiens(self, element):
355 def handle_podpis(self, element):
356 return u"""<env name="figure">""", u"</env>"
358 def handle_tabela(self, element):
360 for w in element.xpath("wiersz"):
362 if max_col < len(ks):
364 self.options = {'columnts': max_col}
366 # has_frames = int(element.attrib.get("ramki", "0"))
367 # if has_frames: frames_c = "framed"
368 # else: frames_c = ""
369 # return u"""<table class="%s">""" % frames_c, u"</table>"
371 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
372 ''' % ('l' * max_col), \
373 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
376 def handle_wiersz(self, element):
377 return u"", u'<ctrl ch="\\"/>'
380 def handle_kol(self, element):
381 if element.getnext() is not None:
382 return u"", u'<spec cat="align" />'
385 def handle_link(self, element):
386 if element.attrib.get('url'):
387 url = element.attrib.get('url')
388 if url == element.text:
389 return cmd('url')(self, element)
391 return cmd('href', parms=[element.attrib['url']])(self, element)
393 return cmd('emph')(self, element)
395 def handle_obraz(self, element):
396 frmt = self.options['format']
397 name = element.attrib.get('nazwa', '').strip()
398 image = frmt.get_image(name.strip())
399 img_path = "obraz/%s" % name.replace("_", "")
400 frmt.attachments[img_path] = image
401 return cmd("obraz", parms=[img_path])(self)
403 def handle_video(self, element):
404 url = element.attrib.get('url')
406 print '!! <video> missing url'
408 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
410 print '!! unknown <video> url scheme:', url
413 thumb = IOFile.from_string(urlopen
414 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
415 img_path = "video/%s.jpg" % name.replace("_", "")
416 self.options['format'].attachments[img_path] = thumb
417 canon_url = "https://www.youtube.com/watch?v=%s" % name
418 return cmd("video", parms=[img_path, canon_url])(self)
421 class Exercise(EduModule):
422 def __init__(self, *args, **kw):
423 self.question_counter = 0
424 super(Exercise, self).__init__(*args, **kw)
426 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
428 def handle_cwiczenie(self, element):
430 'exercise': element.attrib['typ'],
433 self.question_counter = 0
434 self.piece_counter = 0
436 header = etree.Element("parm")
437 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
438 header_cmd.append(header)
439 header.text = u"Zadanie %d." % self.options['exercise_counter']
441 pre = etree.tostring(header_cmd, encoding=unicode)
443 # Add a single <pytanie> tag if it's not there
444 if not element.xpath(".//pytanie"):
445 qpre, qpost = self.handle_pytanie(element)
450 def handle_pytanie(self, element):
451 """This will handle <cwiczenie> element, when there is no <pytanie>
453 self.question_counter += 1
454 self.piece_counter = 0
456 if self.options['teacher'] and element.attrib.get('rozw'):
457 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
460 def handle_punkt(self, element):
461 pre, post = super(Exercise, self).handle_punkt(element)
462 if self.options['teacher'] and element.attrib.get('rozw'):
463 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
466 def solution_header(self):
467 par = etree.Element("cmd", name="par")
468 parm = etree.Element("parm")
469 parm.text = u"Rozwiązanie:"
471 return etree.tostring(par)
473 def explicit_solution(self):
474 if self.options['solution']:
475 par = etree.Element("cmd", name="par")
476 parm = etree.Element("parm")
477 parm.text = self.options['solution']
479 return self.solution_header() + etree.tostring(par)
483 class Wybor(Exercise):
484 def handle_cwiczenie(self, element):
485 pre, post = super(Wybor, self).handle_cwiczenie(element)
486 is_single_choice = True
487 pytania = element.xpath(".//pytanie")
491 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
492 if len(solutions) != 1:
493 is_single_choice = False
495 choices = p.xpath(".//*[@nazwa]")
497 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
498 if len(choices) != len(uniq):
499 is_single_choice = False
502 self.options = {'single': is_single_choice}
505 def handle_punkt(self, element):
506 if self.options['exercise'] and element.attrib.get('nazwa', None):
507 cmd = 'radio' if self.options['single'] else 'checkbox'
508 return u'<cmd name="%s"/>' % cmd, ''
510 return super(Wybor, self).handle_punkt(element)
513 class Uporzadkuj(Exercise):
514 def handle_pytanie(self, element):
515 order_items = element.xpath(".//punkt/@rozw")
516 return super(Uporzadkuj, self).handle_pytanie(element)
519 class Przyporzadkuj(Exercise):
520 def handle_lista(self, lista):
521 header = etree.Element("parm")
522 header_cmd = etree.Element("cmd", name="par")
523 header_cmd.append(header)
524 if 'nazwa' in lista.attrib:
525 header.text = u"Kategorie:"
526 elif 'cel' in lista.attrib:
527 header.text = u"Elementy do przyporządkowania:"
529 header.text = u"Lista:"
530 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
531 pre = etree.tostring(header_cmd, encoding=unicode) + pre
535 class Luki(Exercise):
536 def find_pieces(self, question):
537 return question.xpath(".//luka")
539 def solution(self, piece):
540 piece = deepcopy(piece)
543 return sub.generate(piece)
545 def handle_pytanie(self, element):
546 qpre, qpost = super(Luki, self).handle_pytanie(element)
548 luki = self.find_pieces(element)
550 self.words = u"<env name='itemize'>%s</env>" % (
551 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
555 def handle_opis(self, element):
556 return '', self.words
558 def handle_luka(self, element):
560 if self.options['teacher']:
561 piece = deepcopy(element)
564 text = sub.generate(piece)
565 luka += u" [rozwiązanie: %s]" % text
570 def find_pieces(self, question):
571 return question.xpath(".//zastap")
573 def solution(self, piece):
574 return piece.attrib.get('rozw', '')
576 def list_header(self):
577 return u"Elementy do wstawienia"
579 def handle_zastap(self, element):
580 piece = deepcopy(element)
583 text = sub.generate(piece)
584 if self.options['teacher'] and element.attrib.get('rozw'):
585 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
589 class PrawdaFalsz(Exercise):
590 def handle_punkt(self, element):
591 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
592 if 'rozw' in element.attrib:
593 post += u" [Prawda/Fałsz]"
599 lists = tree.xpath(".//lista")
604 if p.tail is None: p.tail = ''
608 if p.text is None: p.text = ''
614 class EduModulePDFFormat(PDFFormat):
615 style = get_resource('res/styles/edumed/pdf/edumed.sty')
618 substitute_hyphens(self.wldoc.edoc)
619 fix_hanging(self.wldoc.edoc)
621 self.attachments = {}
625 "teacher": self.customization.get('teacher'),
627 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
629 open("/tmp/texml.xml", "w").write(texml)
632 def get_tex_dir(self):
633 temp = super(EduModulePDFFormat, self).get_tex_dir()
634 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
635 for name, iofile in self.attachments.items():
636 iofile.save_as(os.path.join(temp, name))
639 def get_image(self, name):
640 return self.wldoc.source.attachments[name]