1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 "skip metadata in generation"
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + \
97 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
98 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
102 def get_authors(self, element, which=None):
103 dc = self.options['wldoc'].book_info
105 authors = dc.authors_textbook + \
106 dc.authors_scenario + \
109 authors = getattr(dc, "authors_%s" % which)
110 return u', '.join(author.readable() for author in authors if author)
113 def get_title(self, element):
114 return self.get_dc(element, 'title', True)
116 def handle_utwor(self, element):
119 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
121 \\documentclass[%s]{wl}
122 \\usepackage{style}''' % self.options['customization_str'],
123 self.options['has_cover'] and '\usepackage{makecover}',
124 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
125 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
126 (self.options['morefloats'] == 'none' and
127 u'''\\IfFileExists{morefloats.sty}{
128 \\usepackage{morefloats}
130 u'''\\def\\authors{%s}''' % self.get_authors(element),
131 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
132 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
133 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
135 u'''\\author{\\authors}''',
136 u'''\\title{%s}''' % self.get_title(element),
137 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
138 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
141 return u"".join(filter(None, lines)), u'</TeXML>'
145 def handle_powiesc(self, element):
147 <env name="document">
148 <cmd name="maketitle"/>
149 """, """<cmd name="editorialsection" /></env>"""
152 def handle_texcommand(self, element):
153 cmd = functions.texcommand(element.tag)
154 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
160 handle_akap_dialog = \
161 handle_akap_dialog = \
162 handle_autor_utworu = \
164 handle_didaskalia = \
165 handle_didask_tekst = \
166 handle_dlugi_cytat = \
167 handle_dzielo_nadrzedne = \
168 handle_lista_osoba = \
170 handle_miejsce_czas = \
172 handle_motto_podpis = \
173 handle_naglowek_akt = \
174 handle_naglowek_czesc = \
175 handle_naglowek_listy = \
176 handle_naglowek_osoba = \
177 handle_naglowek_scena = \
178 handle_nazwa_utworu = \
184 handle_poezja_cyt = \
187 handle_sekcja_asterysk = \
188 handle_sekcja_swiatlo = \
189 handle_separator_linia = \
190 handle_slowo_obce = \
192 handle_tytul_dziela = \
193 handle_wyroznienie = \
197 def handle_naglowek_rozdzial(self, element):
198 if not self.options['teacher']:
199 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
200 self.state['mute'] = False
202 self.state['mute'] = True
204 return self.handle_texcommand(element)
205 handle_naglowek_rozdzial.unmuter = True
207 def handle_naglowek_podrozdzial(self, element):
208 self.activity_counter = 0
209 if not self.options['teacher']:
210 if element.text.startswith(u'Dla ucznia'):
211 self.state['mute'] = False
213 elif element.text.startswith(u'Dla nauczyciela'):
214 self.state['mute'] = True
216 elif self.state['mute']:
218 return self.handle_texcommand(element)
219 handle_naglowek_podrozdzial.unmuter = True
221 def handle_uwaga(self, _e):
223 def handle_extra(self, _e):
226 def handle_nbsp(self, _e):
227 return '<spec cat="tilde" />'
229 _handle_strofa = cmd("strofa")
231 def handle_strofa(self, element):
232 self.options = {'strofa': True}
233 return self._handle_strofa(element)
235 def handle_aktywnosc(self, element):
236 self.activity_counter += 1
239 'activity_counter': self.activity_counter,
242 submill = EduModule(self.options, self.state)
244 if element.xpath('opis'):
245 opis = submill.generate(element.xpath('opis')[0])
249 n = element.xpath('wskazowki')
250 if n: wskazowki = submill.generate(n[0])
253 n = element.xpath('pomoce')
255 if n: pomoce = submill.generate(n[0])
258 forma = ''.join(element.xpath('forma/text()'))
260 czas = ''.join(element.xpath('czas/text()'))
262 counter = self.activity_counter
264 if element.getnext().tag == 'aktywnosc' or self.activity_last.getnext() == element:
265 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
269 self.activity_last = element
272 <cmd name="noindent" />
274 <cmd name="activityinfo"><parm>
275 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
276 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
277 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
286 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
287 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
289 @ifoption(sub_gen=True)
290 def handle_pomoce(self, _):
291 return "Pomoce: ", ""
293 def handle_czas(self, *_):
296 def handle_forma(self, *_):
299 def handle_lista(self, element, attrs={}):
300 ltype = element.attrib.get('typ', 'punkt')
301 if not element.findall("punkt"):
302 if ltype == 'czytelnia':
303 return 'W przygotowaniu.'
306 if ltype == 'slowniczek':
307 surl = element.attrib.get('src', None)
309 # print '** missing src on <slowniczek>, setting default'
310 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
311 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
312 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
314 listcmd = {'num': 'enumerate',
317 'slowniczek': 'itemize',
318 'czytelnia': 'itemize'}[ltype]
320 return u'<env name="%s">' % listcmd, u'</env>'
322 def handle_punkt(self, element):
323 return '<cmd name="item"/>', ''
325 def handle_cwiczenie(self, element):
326 exercise_handlers = {
328 'uporzadkuj': Uporzadkuj,
331 'przyporzadkuj': Przyporzadkuj,
332 'prawdafalsz': PrawdaFalsz
335 typ = element.attrib['typ']
336 self.exercise_counter += 1
337 if not typ in exercise_handlers:
338 return '(no handler)'
339 self.options = {'exercise_counter': self.exercise_counter}
340 handler = exercise_handlers[typ](self.options, self.state)
341 return handler.generate(element)
343 # XXX this is copied from pyhtml.py, except for return and
344 # should be refactored for no code duplication
345 def handle_definiendum(self, element):
346 nxt = element.getnext()
349 # let's pull definiens from another document
350 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
351 sxml = self.options['slowniczek_xml']
352 assert element.text != ''
353 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
355 definiens = defloc[0].getnext()
356 if definiens.tag == 'definiens':
357 subgen = EduModule(self.options, self.state)
358 definiens_s = subgen.generate(definiens)
360 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
362 def handle_definiens(self, element):
365 def handle_podpis(self, element):
366 return u"""<env name="figure">""", u"</env>"
368 def handle_tabela(self, element):
370 for w in element.xpath("wiersz"):
372 if max_col < len(ks):
374 self.options = {'columnts': max_col}
376 # has_frames = int(element.attrib.get("ramki", "0"))
377 # if has_frames: frames_c = "framed"
378 # else: frames_c = ""
379 # return u"""<table class="%s">""" % frames_c, u"</table>"
381 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
382 ''' % ('l' * max_col), \
383 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
386 def handle_wiersz(self, element):
387 return u"", u'<ctrl ch="\\"/>'
390 def handle_kol(self, element):
391 if element.getnext() is not None:
392 return u"", u'<spec cat="align" />'
395 def handle_link(self, element):
396 if element.attrib.get('url'):
397 url = element.attrib.get('url')
398 if url == element.text:
399 return cmd('url')(self, element)
401 return cmd('href', parms=[element.attrib['url']])(self, element)
403 return cmd('emph')(self, element)
405 def handle_obraz(self, element):
406 frmt = self.options['format']
407 name = element.attrib.get('nazwa', '').strip()
408 image = frmt.get_image(name.strip())
409 name = image.get_filename().rsplit('/', 1)[-1]
410 img_path = "obraz/%s" % name.replace("_", "")
411 frmt.attachments[img_path] = image
412 return cmd("obraz", parms=[img_path])(self)
414 def handle_video(self, element):
415 url = element.attrib.get('url')
417 print '!! <video> missing url'
419 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
421 print '!! unknown <video> url scheme:', url
424 thumb = IOFile.from_string(urlopen
425 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
426 img_path = "video/%s.jpg" % name.replace("_", "")
427 self.options['format'].attachments[img_path] = thumb
428 canon_url = "https://www.youtube.com/watch?v=%s" % name
429 return cmd("video", parms=[img_path, canon_url])(self)
432 class Exercise(EduModule):
433 def __init__(self, *args, **kw):
434 self.question_counter = 0
435 super(Exercise, self).__init__(*args, **kw)
437 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
439 def handle_cwiczenie(self, element):
441 'exercise': element.attrib['typ'],
444 self.question_counter = 0
445 self.piece_counter = 0
447 header = etree.Element("parm")
448 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
449 header_cmd.append(header)
450 header.text = u"Zadanie %d." % self.options['exercise_counter']
452 pre = etree.tostring(header_cmd, encoding=unicode)
454 # Add a single <pytanie> tag if it's not there
455 if not element.xpath(".//pytanie"):
456 qpre, qpost = self.handle_pytanie(element)
461 def handle_pytanie(self, element):
462 """This will handle <cwiczenie> element, when there is no <pytanie>
464 self.question_counter += 1
465 self.piece_counter = 0
467 if self.options['teacher'] and element.attrib.get('rozw'):
468 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
471 def handle_punkt(self, element):
472 pre, post = super(Exercise, self).handle_punkt(element)
473 if self.options['teacher'] and element.attrib.get('rozw'):
474 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
477 def solution_header(self):
478 par = etree.Element("cmd", name="par")
479 parm = etree.Element("parm")
480 parm.text = u"Rozwiązanie:"
482 return etree.tostring(par)
484 def explicit_solution(self):
485 if self.options['solution']:
486 par = etree.Element("cmd", name="par")
487 parm = etree.Element("parm")
488 parm.text = self.options['solution']
490 return self.solution_header() + etree.tostring(par)
494 class Wybor(Exercise):
495 def handle_cwiczenie(self, element):
496 pre, post = super(Wybor, self).handle_cwiczenie(element)
497 is_single_choice = True
498 pytania = element.xpath(".//pytanie")
502 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
503 if len(solutions) != 1:
504 is_single_choice = False
506 choices = p.xpath(".//*[@nazwa]")
508 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
509 if len(choices) != len(uniq):
510 is_single_choice = False
513 self.options = {'single': is_single_choice}
516 def handle_punkt(self, element):
517 if self.options['exercise'] and element.attrib.get('nazwa', None):
518 cmd = 'radio' if self.options['single'] else 'checkbox'
519 return u'<cmd name="%s"/>' % cmd, ''
521 return super(Wybor, self).handle_punkt(element)
524 class Uporzadkuj(Exercise):
525 def handle_pytanie(self, element):
526 order_items = element.xpath(".//punkt/@rozw")
527 return super(Uporzadkuj, self).handle_pytanie(element)
530 class Przyporzadkuj(Exercise):
531 def handle_lista(self, lista):
532 header = etree.Element("parm")
533 header_cmd = etree.Element("cmd", name="par")
534 header_cmd.append(header)
535 if 'nazwa' in lista.attrib:
536 header.text = u"Kategorie:"
537 elif 'cel' in lista.attrib:
538 header.text = u"Elementy do przyporządkowania:"
540 header.text = u"Lista:"
541 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
542 pre = etree.tostring(header_cmd, encoding=unicode) + pre
546 class Luki(Exercise):
547 def find_pieces(self, question):
548 return question.xpath(".//luka")
550 def solution(self, piece):
551 piece = deepcopy(piece)
554 return sub.generate(piece)
556 def handle_pytanie(self, element):
557 qpre, qpost = super(Luki, self).handle_pytanie(element)
559 luki = self.find_pieces(element)
561 self.words = u"<env name='itemize'>%s</env>" % (
562 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
566 def handle_opis(self, element):
567 return '', self.words
569 def handle_luka(self, element):
571 if self.options['teacher']:
572 piece = deepcopy(element)
575 text = sub.generate(piece)
576 luka += u" [rozwiązanie: %s]" % text
581 def find_pieces(self, question):
582 return question.xpath(".//zastap")
584 def solution(self, piece):
585 return piece.attrib.get('rozw', '')
587 def list_header(self):
588 return u"Elementy do wstawienia"
590 def handle_zastap(self, element):
591 piece = deepcopy(element)
594 text = sub.generate(piece)
595 if self.options['teacher'] and element.attrib.get('rozw'):
596 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
600 class PrawdaFalsz(Exercise):
601 def handle_punkt(self, element):
602 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
603 if 'rozw' in element.attrib:
604 post += u" [Prawda/Fałsz]"
610 lists = tree.xpath(".//lista")
615 if p.tail is None: p.tail = ''
619 if p.text is None: p.text = ''
625 class EduModulePDFFormat(PDFFormat):
626 style = get_resource('res/styles/edumed/pdf/edumed.sty')
629 substitute_hyphens(self.wldoc.edoc)
630 fix_hanging(self.wldoc.edoc)
632 self.attachments = {}
636 "teacher": self.customization.get('teacher'),
638 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
640 open("/tmp/texml.xml", "w").write(texml)
643 def get_tex_dir(self):
644 temp = super(EduModulePDFFormat, self).get_tex_dir()
645 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
646 for name, iofile in self.attachments.items():
647 iofile.save_as(os.path.join(temp, name))
650 def get_image(self, name):
651 return self.wldoc.source.attachments[name]