1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, ifoption, tag_open_close
22 from librarian import DCNS, get_resource, IOFile
23 from librarian import functions
24 from pdf import PDFFormat, substitute_hyphens, fix_hanging
29 def _wrap(*args, **kw):
30 value = f(*args, **kw)
32 prefix = (u'<TeXML escape="%d">' % (1 if really else 0))
34 if isinstance(value, list):
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
86 return values[0] if len(values) else ''
89 def handle_rdf__RDF(self, _):
90 """skip metadata in generation"""
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors if author)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
116 def get_description(self, element):
117 desc = self.get_dc(element, 'description', single=True)
119 print '!! no descripton'
122 def handle_utwor(self, element):
125 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
127 \\documentclass[%s]{wl}
128 \\usepackage{style}''' % self.options['customization_str'],
129 self.options['has_cover'] and '\usepackage{makecover}',
130 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
131 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
132 (self.options['morefloats'] == 'none' and
133 u'''\\IfFileExists{morefloats.sty}{
134 \\usepackage{morefloats}
136 u'''\\def\\authors{%s}''' % self.get_authors(element),
137 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
138 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
139 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
140 u'''\\def\\description{%s}''' % self.get_description(element),
142 u'''\\author{\\authors}''',
143 u'''\\title{%s}''' % self.get_title(element),
144 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
145 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
149 return u"".join(filter(None, lines)), u'</TeXML>'
152 def handle_powiesc(self, element):
154 <env name="document">
155 <cmd name="maketitle"/>
156 """, """<cmd name="editorialsection" /></env>"""
159 def handle_texcommand(self, element):
160 cmd = functions.texcommand(element.tag)
161 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
165 handle_akap_dialog = \
166 handle_autor_utworu = \
168 handle_didaskalia = \
169 handle_didask_tekst = \
170 handle_dlugi_cytat = \
171 handle_dzielo_nadrzedne = \
172 handle_lista_osoba = \
174 handle_miejsce_czas = \
176 handle_motto_podpis = \
177 handle_naglowek_akt = \
178 handle_naglowek_czesc = \
179 handle_naglowek_listy = \
180 handle_naglowek_osoba = \
181 handle_naglowek_scena = \
182 handle_nazwa_utworu = \
188 handle_poezja_cyt = \
191 handle_sekcja_asterysk = \
192 handle_sekcja_swiatlo = \
193 handle_separator_linia = \
194 handle_slowo_obce = \
196 handle_tytul_dziela = \
197 handle_wyroznienie = \
201 def handle_naglowek_rozdzial(self, element):
202 if not self.options['teacher']:
203 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
204 self.state['mute'] = False
206 self.state['mute'] = True
208 return self.handle_texcommand(element)
209 handle_naglowek_rozdzial.unmuter = True
211 def handle_naglowek_podrozdzial(self, element):
212 self.activity_counter = 0
213 if not self.options['teacher']:
214 if element.text.startswith(u'Dla ucznia'):
215 self.state['mute'] = False
217 elif element.text.startswith(u'Dla nauczyciela'):
218 self.state['mute'] = True
220 elif self.state['mute']:
222 return self.handle_texcommand(element)
223 handle_naglowek_podrozdzial.unmuter = True
225 def handle_uwaga(self, _e):
228 def handle_extra(self, _e):
231 def handle_nbsp(self, _e):
232 return '<spec cat="tilde" />'
234 _handle_strofa = cmd("strofa")
236 def handle_strofa(self, element):
237 self.options = {'strofa': True}
238 return self._handle_strofa(element)
240 def handle_aktywnosc(self, element):
241 self.activity_counter += 1
244 'activity_counter': self.activity_counter,
247 submill = EduModule(self.options, self.state)
249 if element.xpath('opis'):
250 opis = submill.generate(element.xpath('opis')[0])
254 n = element.xpath('wskazowki')
256 wskazowki = submill.generate(n[0])
259 n = element.xpath('pomoce')
262 pomoce = submill.generate(n[0])
266 forma = ''.join(element.xpath('forma/text()'))
268 czas = ''.join(element.xpath('czas/text()'))
270 counter = self.activity_counter
272 if element.getnext().tag == 'aktywnosc' or (len(self.activity_last) and self.activity_last.getnext() == element):
273 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
277 self.activity_last = element
280 <cmd name="noindent" />
282 <cmd name="activityinfo"><parm>
283 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
284 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
285 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
294 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
295 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
297 @ifoption(sub_gen=True)
298 def handle_pomoce(self, _):
299 return "Pomoce: ", ""
301 def handle_czas(self, *_):
304 def handle_forma(self, *_):
307 def handle_lista(self, element, attrs=None):
308 ltype = element.attrib.get('typ', 'punkt')
309 if not element.findall("punkt"):
310 if ltype == 'czytelnia':
311 return 'W przygotowaniu.'
314 if ltype == 'slowniczek':
315 surl = element.attrib.get('src', None)
317 # print '** missing src on <slowniczek>, setting default'
318 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
319 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
320 self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
326 'slowniczek': 'itemize',
327 'czytelnia': 'itemize'
330 return u'<env name="%s">' % listcmd, u'</env>'
332 def handle_punkt(self, element):
333 return '<cmd name="item"/>', ''
335 def handle_cwiczenie(self, element):
336 exercise_handlers = {
338 'uporzadkuj': Uporzadkuj,
341 'przyporzadkuj': Przyporzadkuj,
342 'prawdafalsz': PrawdaFalsz
345 typ = element.attrib['typ']
346 self.exercise_counter += 1
347 if typ not in exercise_handlers:
348 return '(no handler)'
349 self.options = {'exercise_counter': self.exercise_counter}
350 handler = exercise_handlers[typ](self.options, self.state)
351 return handler.generate(element)
353 # XXX this is copied from pyhtml.py, except for return and
354 # should be refactored for no code duplication
355 def handle_definiendum(self, element):
356 nxt = element.getnext()
359 # let's pull definiens from another document
360 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
361 sxml = self.options['slowniczek_xml']
362 assert element.text != ''
363 if "'" in (element.text or ''):
364 defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
366 defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
368 definiens = defloc[0].getnext()
369 if definiens.tag == 'definiens':
370 subgen = EduModule(self.options, self.state)
371 definiens_s = subgen.generate(definiens)
373 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
375 def handle_definiens(self, element):
378 def handle_podpis(self, element):
379 return u"""<env name="figure">""", u"</env>"
381 def handle_tabela(self, element):
383 for w in element.xpath("wiersz"):
385 if max_col < len(ks):
387 self.options = {'columnts': max_col}
389 # has_frames = int(element.attrib.get("ramki", "0"))
390 # if has_frames: frames_c = "framed"
391 # else: frames_c = ""
392 # return u"""<table class="%s">""" % frames_c, u"</table>"
394 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
395 ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
398 def handle_wiersz(self, element):
399 return u"", u'<ctrl ch="\\"/>'
402 def handle_kol(self, element):
403 if element.getnext() is not None:
404 return u"", u'<spec cat="align" />'
407 def handle_link(self, element):
408 if element.attrib.get('url'):
409 url = element.attrib.get('url')
410 if url == element.text:
411 return cmd('url')(self, element)
413 return cmd('href', parms=[element.attrib['url']])(self, element)
415 return cmd('emph')(self, element)
417 def handle_obraz(self, element):
418 frmt = self.options['format']
419 name = element.attrib.get('nazwa', '').strip()
420 image = frmt.get_image(name.strip())
421 name = image.get_filename().rsplit('/', 1)[-1]
422 img_path = "obraz/%s" % name.replace("_", "")
423 frmt.attachments[img_path] = image
424 return cmd("obraz", parms=[img_path])(self)
426 def handle_video(self, element):
427 url = element.attrib.get('url')
429 print '!! <video> missing url'
431 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
433 print '!! unknown <video> url scheme:', url
436 thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
437 img_path = "video/%s.jpg" % name.replace("_", "")
438 self.options['format'].attachments[img_path] = thumb
439 canon_url = "https://www.youtube.com/watch?v=%s" % name
440 return cmd("video", parms=[img_path, canon_url])(self)
443 class Exercise(EduModule):
444 def __init__(self, *args, **kw):
445 self.question_counter = 0
446 super(Exercise, self).__init__(*args, **kw)
447 self.piece_counter = None
449 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
451 def handle_cwiczenie(self, element):
453 'exercise': element.attrib['typ'],
456 self.question_counter = 0
457 self.piece_counter = 0
459 header = etree.Element("parm")
460 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
461 header_cmd.append(header)
462 header.text = u"Zadanie %d." % self.options['exercise_counter']
464 pre = etree.tostring(header_cmd, encoding=unicode)
466 # Add a single <pytanie> tag if it's not there
467 if not element.xpath(".//pytanie"):
468 qpre, qpost = self.handle_pytanie(element)
473 def handle_pytanie(self, element):
474 """This will handle <cwiczenie> element, when there is no <pytanie>
476 self.question_counter += 1
477 self.piece_counter = 0
479 if self.options['teacher'] and element.attrib.get('rozw'):
480 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
483 def handle_punkt(self, element):
484 pre, post = super(Exercise, self).handle_punkt(element)
485 if self.options['teacher'] and element.attrib.get('rozw'):
486 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
489 def solution_header(self):
490 par = etree.Element("cmd", name="par")
491 parm = etree.Element("parm")
492 parm.text = u"Rozwiązanie:"
494 return etree.tostring(par)
496 def explicit_solution(self):
497 if self.options['solution']:
498 par = etree.Element("cmd", name="par")
499 parm = etree.Element("parm")
500 parm.text = self.options['solution']
502 return self.solution_header() + etree.tostring(par)
505 class Wybor(Exercise):
506 def handle_cwiczenie(self, element):
507 pre, post = super(Wybor, self).handle_cwiczenie(element)
508 is_single_choice = True
509 pytania = element.xpath(".//pytanie")
513 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
514 if len(solutions) != 1:
515 is_single_choice = False
517 choices = p.xpath(".//*[@nazwa]")
520 uniq.add(n.attrib.get('nazwa', ''))
521 if len(choices) != len(uniq):
522 is_single_choice = False
525 self.options = {'single': is_single_choice}
528 def handle_punkt(self, element):
529 if self.options['exercise'] and element.attrib.get('nazwa', None):
530 cmd = 'radio' if self.options['single'] else 'checkbox'
531 return u'<cmd name="%s"/>' % cmd, ''
533 return super(Wybor, self).handle_punkt(element)
536 class Uporzadkuj(Exercise):
537 def handle_pytanie(self, element):
538 order_items = element.xpath(".//punkt/@rozw")
539 return super(Uporzadkuj, self).handle_pytanie(element)
542 class Przyporzadkuj(Exercise):
543 def handle_lista(self, lista):
544 header = etree.Element("parm")
545 header_cmd = etree.Element("cmd", name="par")
546 header_cmd.append(header)
547 if 'nazwa' in lista.attrib:
548 header.text = u"Kategorie:"
549 elif 'cel' in lista.attrib:
550 header.text = u"Elementy do przyporządkowania:"
552 header.text = u"Lista:"
553 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
554 pre = etree.tostring(header_cmd, encoding=unicode) + pre
558 class Luki(Exercise):
559 def find_pieces(self, question):
560 return question.xpath(".//luka")
562 def solution(self, piece):
563 piece = deepcopy(piece)
566 return sub.generate(piece)
568 def handle_pytanie(self, element):
569 qpre, qpost = super(Luki, self).handle_pytanie(element)
571 luki = self.find_pieces(element)
573 self.words = u"<env name='itemize'>%s</env>" % (
574 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
578 def handle_opis(self, element):
579 return '', self.words
581 def handle_luka(self, element):
583 if self.options['teacher']:
584 piece = deepcopy(element)
587 text = sub.generate(piece)
588 luka += u" [rozwiązanie: %s]" % text
593 def find_pieces(self, question):
594 return question.xpath(".//zastap")
596 def solution(self, piece):
597 return piece.attrib.get('rozw', '')
599 def list_header(self):
600 return u"Elementy do wstawienia"
602 def handle_zastap(self, element):
603 piece = deepcopy(element)
606 text = sub.generate(piece)
607 if self.options['teacher'] and element.attrib.get('rozw'):
608 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
612 class PrawdaFalsz(Exercise):
613 def handle_punkt(self, element):
614 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
615 if 'rozw' in element.attrib:
616 post += u" [Prawda/Fałsz]"
621 lists = tree.xpath(".//lista")
638 class EduModulePDFFormat(PDFFormat):
639 style = get_resource('res/styles/edumed/pdf/edumed.sty')
642 substitute_hyphens(self.wldoc.edoc)
643 fix_hanging(self.wldoc.edoc)
645 self.attachments = {}
649 "teacher": self.customization.get('teacher'),
651 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
653 open("/tmp/texml.xml", "w").write(texml)
656 def get_tex_dir(self):
657 temp = super(EduModulePDFFormat, self).get_tex_dir()
658 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
659 for name, iofile in self.attachments.items():
660 iofile.save_as(os.path.join(temp, name))
663 def get_image(self, name):
664 return self.wldoc.source.attachments[name]