1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None):
70 super(EduModule, self).__init__(options)
71 self.activity_counter = 0
72 self.exercise_counter = 0
74 def swap_endlines(txt):
75 if self.options['strofa']:
76 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
78 self.register_text_filter(swap_endlines)
79 self.register_text_filter(functions.substitute_entities)
80 self.register_text_filter(mark_alien_characters)
82 def get_dc(self, element, dc_field, single=False):
83 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
88 def handle_rdf__RDF(self, _):
89 "skip metadata in generation"
93 def get_rightsinfo(self, element):
94 rights_lic = self.get_dc(element, 'rights.license', True)
95 return u'<cmd name="rightsinfostr">' + \
96 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
115 def handle_utwor(self, element):
118 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
120 \\documentclass[%s]{wl}
121 \\usepackage{style}''' % self.options['customization_str'],
122 self.options['has_cover'] and '\usepackage{makecover}',
123 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
124 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
125 (self.options['morefloats'] == 'none' and
126 u'''\\IfFileExists{morefloats.sty}{
127 \\usepackage{morefloats}
129 u'''\\def\\authors{%s}''' % self.get_authors(element),
130 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
131 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
132 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
134 u'''\\author{\\authors}''',
135 u'''\\title{%s}''' % self.get_title(element),
136 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
137 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
140 return u"".join(filter(None, lines)), u'</TeXML>'
144 def handle_powiesc(self, element):
146 <env name="document">
147 <cmd name="maketitle"/>
148 """, """<cmd name="editorialsection" /></env>"""
151 def handle_texcommand(self, element):
152 cmd = functions.texcommand(element.tag)
153 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
159 handle_akap_dialog = \
160 handle_akap_dialog = \
161 handle_autor_utworu = \
163 handle_didaskalia = \
164 handle_didask_tekst = \
165 handle_dlugi_cytat = \
166 handle_dzielo_nadrzedne = \
167 handle_lista_osoba = \
169 handle_miejsce_czas = \
171 handle_motto_podpis = \
172 handle_naglowek_akt = \
173 handle_naglowek_czesc = \
174 handle_naglowek_listy = \
175 handle_naglowek_osoba = \
176 handle_naglowek_podrozdzial = \
177 handle_naglowek_podrozdzial = \
178 handle_naglowek_rozdzial = \
179 handle_naglowek_rozdzial = \
180 handle_naglowek_scena = \
181 handle_nazwa_utworu = \
187 handle_poezja_cyt = \
190 handle_sekcja_asterysk = \
191 handle_sekcja_swiatlo = \
192 handle_separator_linia = \
193 handle_slowo_obce = \
195 handle_tytul_dziela = \
196 handle_wyroznienie = \
200 def handle_uwaga(self, _e):
202 def handle_extra(self, _e):
205 def handle_nbsp(self, _e):
206 return '<spec cat="tilde" />'
208 _handle_strofa = cmd("strofa")
210 def handle_strofa(self, element):
211 self.options = {'strofa': True}
212 return self._handle_strofa(element)
214 def handle_aktywnosc(self, element):
215 self.activity_counter += 1
218 'activity_counter': self.activity_counter,
221 submill = EduModule(self.options)
223 if element.xpath('opis'):
224 opis = submill.generate(element.xpath('opis')[0])
228 n = element.xpath('wskazowki')
229 if n: wskazowki = submill.generate(n[0])
232 n = element.xpath('pomoce')
234 if n: pomoce = submill.generate(n[0])
237 forma = ''.join(element.xpath('forma/text()'))
239 czas = ''.join(element.xpath('czas/text()'))
241 counter = self.activity_counter
244 <cmd name="noindent" />
245 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
246 <cmd name="activityinfo"><parm>
247 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
248 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
249 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
258 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
259 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
261 @ifoption(sub_gen=True)
262 def handle_pomoce(self, _):
263 return "Pomoce: ", ""
265 def handle_czas(self, *_):
268 def handle_forma(self, *_):
271 def handle_lista(self, element, attrs={}):
272 ltype = element.attrib.get('typ', 'punkt')
273 if not element.findall("punkt"):
274 if ltype == 'czytelnia':
275 return 'W przygotowaniu.'
278 if ltype == 'slowniczek':
279 surl = element.attrib.get('src', None)
281 # print '** missing src on <slowniczek>, setting default'
282 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
285 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
286 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
288 listcmd = {'num': 'enumerate',
291 'slowniczek': 'itemize',
292 'czytelnia': 'itemize'}[ltype]
294 return u'<env name="%s">' % listcmd, u'</env>'
296 def handle_punkt(self, element):
297 return '<cmd name="item"/>', ''
299 def handle_cwiczenie(self, element):
300 exercise_handlers = {
302 'uporzadkuj': Uporzadkuj,
305 'przyporzadkuj': Przyporzadkuj,
306 'prawdafalsz': PrawdaFalsz
309 typ = element.attrib['typ']
310 self.exercise_counter += 1
311 if not typ in exercise_handlers:
312 return '(no handler)'
313 self.options = {'exercise_counter': self.exercise_counter}
314 handler = exercise_handlers[typ](self.options)
315 return handler.generate(element)
317 # XXX this is copied from pyhtml.py, except for return and
318 # should be refactored for no code duplication
319 def handle_definiendum(self, element):
320 nxt = element.getnext()
323 # let's pull definiens from another document
324 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
325 sxml = self.options['slowniczek_xml']
326 assert element.text != ''
327 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
329 definiens = defloc[0].getnext()
330 if definiens.tag == 'definiens':
331 subgen = EduModule(self.options)
332 definiens_s = subgen.generate(definiens)
334 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
336 def handle_definiens(self, element):
339 def handle_podpis(self, element):
340 return u"""<env name="figure">""", u"</env>"
342 def handle_tabela(self, element):
344 for w in element.xpath("wiersz"):
346 if max_col < len(ks):
348 self.options = {'columnts': max_col}
350 # has_frames = int(element.attrib.get("ramki", "0"))
351 # if has_frames: frames_c = "framed"
352 # else: frames_c = ""
353 # return u"""<table class="%s">""" % frames_c, u"</table>"
355 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
356 ''' % ('l' * max_col), \
357 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
360 def handle_wiersz(self, element):
361 return u"", u'<ctrl ch="\\"/>'
364 def handle_kol(self, element):
365 if element.getnext() is not None:
366 return u"", u'<spec cat="align" />'
369 def handle_link(self, element):
370 if element.attrib.get('url'):
371 url = element.attrib.get('url')
372 if url == element.text:
373 return cmd('url')(self, element)
375 return cmd('href', parms=[element.attrib['url']])(self, element)
377 return cmd('emph')(self, element)
379 def handle_obraz(self, element):
380 frmt = self.options['format']
381 name = element.attrib.get('nazwa', '').strip()
382 image = frmt.get_image(name.strip())
383 img_path = "obraz/%s" % name.replace("_", "")
384 frmt.attachments[img_path] = image
385 return cmd("obraz", parms=[img_path])(self)
387 def handle_video(self, element):
388 url = element.attrib.get('url')
390 print '!! <video> missing url'
392 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
394 print '!! unknown <video> url scheme:', url
397 thumb = IOFile.from_string(urlopen
398 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
399 img_path = "video/%s.jpg" % name.replace("_", "")
400 self.options['format'].attachments[img_path] = thumb
401 canon_url = "https://www.youtube.com/watch?v=%s" % name
402 return cmd("video", parms=[img_path, canon_url])(self)
405 class Exercise(EduModule):
406 def __init__(self, *args, **kw):
407 self.question_counter = 0
408 super(Exercise, self).__init__(*args, **kw)
410 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
412 def handle_cwiczenie(self, element):
414 'exercise': element.attrib['typ'],
417 self.question_counter = 0
418 self.piece_counter = 0
420 header = etree.Element("parm")
421 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
422 header_cmd.append(header)
423 header.text = u"Zadanie %d." % self.options['exercise_counter']
425 pre = etree.tostring(header_cmd, encoding=unicode)
427 # Add a single <pytanie> tag if it's not there
428 if not element.xpath(".//pytanie"):
429 qpre, qpost = self.handle_pytanie(element)
434 def handle_pytanie(self, element):
435 """This will handle <cwiczenie> element, when there is no <pytanie>
437 self.question_counter += 1
438 self.piece_counter = 0
440 if self.options['teacher'] and element.attrib.get('rozw'):
441 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
444 def handle_punkt(self, element):
445 pre, post = super(Exercise, self).handle_punkt(element)
446 if self.options['teacher'] and element.attrib.get('rozw'):
447 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
450 def solution_header(self):
451 par = etree.Element("cmd", name="par")
452 parm = etree.Element("parm")
453 parm.text = u"Rozwiązanie:"
455 return etree.tostring(par)
457 def explicit_solution(self):
458 if self.options['solution']:
459 par = etree.Element("cmd", name="par")
460 parm = etree.Element("parm")
461 parm.text = self.options['solution']
463 return self.solution_header() + etree.tostring(par)
467 class Wybor(Exercise):
468 def handle_cwiczenie(self, element):
469 pre, post = super(Wybor, self).handle_cwiczenie(element)
470 is_single_choice = True
471 pytania = element.xpath(".//pytanie")
475 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
476 if len(solutions) != 1:
477 is_single_choice = False
479 choices = p.xpath(".//*[@nazwa]")
481 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
482 if len(choices) != len(uniq):
483 is_single_choice = False
486 self.options = {'single': is_single_choice}
489 def handle_punkt(self, element):
490 if self.options['exercise'] and element.attrib.get('nazwa', None):
491 cmd = 'radio' if self.options['single'] else 'checkbox'
492 return u'<cmd name="%s"/>' % cmd, ''
494 return super(Wybor, self).handle_punkt(element)
497 class Uporzadkuj(Exercise):
498 def handle_pytanie(self, element):
499 order_items = element.xpath(".//punkt/@rozw")
500 return super(Uporzadkuj, self).handle_pytanie(element)
503 class Przyporzadkuj(Exercise):
504 def handle_lista(self, lista):
505 header = etree.Element("parm")
506 header_cmd = etree.Element("cmd", name="par")
507 header_cmd.append(header)
508 if 'nazwa' in lista.attrib:
509 header.text = u"Kategorie:"
510 elif 'cel' in lista.attrib:
511 header.text = u"Elementy do przyporządkowania:"
513 header.text = u"Lista:"
514 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
515 pre = etree.tostring(header_cmd, encoding=unicode) + pre
519 class Luki(Exercise):
520 def find_pieces(self, question):
521 return question.xpath(".//luka")
523 def solution(self, piece):
524 piece = deepcopy(piece)
527 return sub.generate(piece)
529 def handle_pytanie(self, element):
530 qpre, qpost = super(Luki, self).handle_pytanie(element)
532 luki = self.find_pieces(element)
534 self.words = u"<env name='itemize'>%s</env>" % (
535 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
539 def handle_opis(self, element):
540 return '', self.words
542 def handle_luka(self, element):
544 if self.options['teacher']:
545 piece = deepcopy(element)
548 text = sub.generate(piece)
549 luka += u" [rozwiązanie: %s]" % text
554 def find_pieces(self, question):
555 return question.xpath(".//zastap")
557 def solution(self, piece):
558 return piece.attrib.get('rozw', '')
560 def list_header(self):
561 return u"Elementy do wstawienia"
563 def handle_zastap(self, element):
564 piece = deepcopy(element)
567 text = sub.generate(piece)
568 if self.options['teacher'] and element.attrib.get('rozw'):
569 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
573 class PrawdaFalsz(Exercise):
574 def handle_punkt(self, element):
575 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
576 if 'rozw' in element.attrib:
577 post += u" [Prawda/Fałsz]"
583 lists = tree.xpath(".//lista")
588 if p.tail is None: p.tail = ''
592 if p.text is None: p.text = ''
598 class EduModulePDFFormat(PDFFormat):
599 style = get_resource('res/styles/edumed/pdf/edumed.sty')
602 substitute_hyphens(self.wldoc.edoc)
603 fix_hanging(self.wldoc.edoc)
605 self.attachments = {}
609 "teacher": self.customization.get('teacher'),
611 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
613 open("/tmp/texml.xml", "w").write(texml)
616 def get_tex_dir(self):
617 temp = super(EduModulePDFFormat, self).get_tex_dir()
618 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
619 for name, iofile in self.attachments.items():
620 iofile.save_as(os.path.join(temp, name))
623 def get_image(self, name):
624 return self.wldoc.source.attachments[name]