1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 "skip metadata in generation"
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + \
97 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
98 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
102 def get_authors(self, element, which=None):
103 dc = self.options['wldoc'].book_info
105 authors = dc.authors_textbook + \
106 dc.authors_scenario + \
109 authors = getattr(dc, "authors_%s" % which)
110 return u', '.join(author.readable() for author in authors if author)
113 def get_title(self, element):
114 return self.get_dc(element, 'title', True)
116 def handle_utwor(self, element):
119 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
121 \\documentclass[%s]{wl}
122 \\usepackage{style}''' % self.options['customization_str'],
123 self.options['has_cover'] and '\usepackage{makecover}',
124 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
125 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
126 (self.options['morefloats'] == 'none' and
127 u'''\\IfFileExists{morefloats.sty}{
128 \\usepackage{morefloats}
130 u'''\\def\\authors{%s}''' % self.get_authors(element),
131 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
132 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
133 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
135 u'''\\author{\\authors}''',
136 u'''\\title{%s}''' % self.get_title(element),
137 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
138 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
141 return u"".join(filter(None, lines)), u'</TeXML>'
145 def handle_powiesc(self, element):
147 <env name="document">
148 <cmd name="maketitle"/>
149 """, """<cmd name="editorialsection" /></env>"""
152 def handle_texcommand(self, element):
153 cmd = functions.texcommand(element.tag)
154 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
160 handle_akap_dialog = \
161 handle_akap_dialog = \
162 handle_autor_utworu = \
164 handle_didaskalia = \
165 handle_didask_tekst = \
166 handle_dlugi_cytat = \
167 handle_dzielo_nadrzedne = \
168 handle_lista_osoba = \
170 handle_miejsce_czas = \
172 handle_motto_podpis = \
173 handle_naglowek_akt = \
174 handle_naglowek_czesc = \
175 handle_naglowek_listy = \
176 handle_naglowek_osoba = \
177 handle_naglowek_scena = \
178 handle_nazwa_utworu = \
184 handle_poezja_cyt = \
187 handle_sekcja_asterysk = \
188 handle_sekcja_swiatlo = \
189 handle_separator_linia = \
190 handle_slowo_obce = \
192 handle_tytul_dziela = \
193 handle_wyroznienie = \
197 def handle_naglowek_rozdzial(self, element):
198 if not self.options['teacher']:
199 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
200 self.state['mute'] = False
202 self.state['mute'] = True
204 return self.handle_texcommand(element)
205 handle_naglowek_rozdzial.unmuter = True
207 def handle_naglowek_podrozdzial(self, element):
208 self.activity_counter = 0
209 if not self.options['teacher']:
210 if element.text.startswith(u'Dla ucznia'):
211 self.state['mute'] = False
213 elif element.text.startswith(u'Dla nauczyciela'):
214 self.state['mute'] = True
216 elif self.state['mute']:
218 return self.handle_texcommand(element)
219 handle_naglowek_podrozdzial.unmuter = True
221 def handle_uwaga(self, _e):
223 def handle_extra(self, _e):
226 def handle_nbsp(self, _e):
227 return '<spec cat="tilde" />'
229 _handle_strofa = cmd("strofa")
231 def handle_strofa(self, element):
232 self.options = {'strofa': True}
233 return self._handle_strofa(element)
235 def handle_aktywnosc(self, element):
236 self.activity_counter += 1
239 'activity_counter': self.activity_counter,
242 submill = EduModule(self.options, self.state)
244 if element.xpath('opis'):
245 opis = submill.generate(element.xpath('opis')[0])
249 n = element.xpath('wskazowki')
250 if n: wskazowki = submill.generate(n[0])
253 n = element.xpath('pomoce')
255 if n: pomoce = submill.generate(n[0])
258 forma = ''.join(element.xpath('forma/text()'))
260 czas = ''.join(element.xpath('czas/text()'))
262 counter = self.activity_counter
264 if element.getnext().tag == 'aktywnosc' or self.activity_last.getnext() == element:
265 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
269 self.activity_last = element
272 <cmd name="noindent" />
274 <cmd name="activityinfo"><parm>
275 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
276 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
277 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
286 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
287 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
289 @ifoption(sub_gen=True)
290 def handle_pomoce(self, _):
291 return "Pomoce: ", ""
293 def handle_czas(self, *_):
296 def handle_forma(self, *_):
299 def handle_lista(self, element, attrs={}):
300 ltype = element.attrib.get('typ', 'punkt')
301 if not element.findall("punkt"):
302 if ltype == 'czytelnia':
303 return 'W przygotowaniu.'
306 if ltype == 'slowniczek':
307 surl = element.attrib.get('src', None)
309 # print '** missing src on <slowniczek>, setting default'
310 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
311 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
312 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
314 listcmd = {'num': 'enumerate',
317 'slowniczek': 'itemize',
318 'czytelnia': 'itemize'}[ltype]
320 return u'<env name="%s">' % listcmd, u'</env>'
322 def handle_punkt(self, element):
323 return '<cmd name="item"/>', ''
325 def handle_cwiczenie(self, element):
326 exercise_handlers = {
328 'uporzadkuj': Uporzadkuj,
331 'przyporzadkuj': Przyporzadkuj,
332 'prawdafalsz': PrawdaFalsz
335 typ = element.attrib['typ']
336 self.exercise_counter += 1
337 if not typ in exercise_handlers:
338 return '(no handler)'
339 self.options = {'exercise_counter': self.exercise_counter}
340 handler = exercise_handlers[typ](self.options, self.state)
341 return handler.generate(element)
343 # XXX this is copied from pyhtml.py, except for return and
344 # should be refactored for no code duplication
345 def handle_definiendum(self, element):
346 nxt = element.getnext()
349 # let's pull definiens from another document
350 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
351 sxml = self.options['slowniczek_xml']
352 assert element.text != ''
353 if "'" in (element.text or ''):
354 defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
356 defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
358 definiens = defloc[0].getnext()
359 if definiens.tag == 'definiens':
360 subgen = EduModule(self.options, self.state)
361 definiens_s = subgen.generate(definiens)
363 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
365 def handle_definiens(self, element):
368 def handle_podpis(self, element):
369 return u"""<env name="figure">""", u"</env>"
371 def handle_tabela(self, element):
373 for w in element.xpath("wiersz"):
375 if max_col < len(ks):
377 self.options = {'columnts': max_col}
379 # has_frames = int(element.attrib.get("ramki", "0"))
380 # if has_frames: frames_c = "framed"
381 # else: frames_c = ""
382 # return u"""<table class="%s">""" % frames_c, u"</table>"
384 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
385 ''' % ('l' * max_col), \
386 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
389 def handle_wiersz(self, element):
390 return u"", u'<ctrl ch="\\"/>'
393 def handle_kol(self, element):
394 if element.getnext() is not None:
395 return u"", u'<spec cat="align" />'
398 def handle_link(self, element):
399 if element.attrib.get('url'):
400 url = element.attrib.get('url')
401 if url == element.text:
402 return cmd('url')(self, element)
404 return cmd('href', parms=[element.attrib['url']])(self, element)
406 return cmd('emph')(self, element)
408 def handle_obraz(self, element):
409 frmt = self.options['format']
410 name = element.attrib.get('nazwa', '').strip()
411 image = frmt.get_image(name.strip())
412 name = image.get_filename().rsplit('/', 1)[-1]
413 img_path = "obraz/%s" % name.replace("_", "")
414 frmt.attachments[img_path] = image
415 return cmd("obraz", parms=[img_path])(self)
417 def handle_video(self, element):
418 url = element.attrib.get('url')
420 print '!! <video> missing url'
422 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
424 print '!! unknown <video> url scheme:', url
427 thumb = IOFile.from_string(urlopen
428 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
429 img_path = "video/%s.jpg" % name.replace("_", "")
430 self.options['format'].attachments[img_path] = thumb
431 canon_url = "https://www.youtube.com/watch?v=%s" % name
432 return cmd("video", parms=[img_path, canon_url])(self)
435 class Exercise(EduModule):
436 def __init__(self, *args, **kw):
437 self.question_counter = 0
438 super(Exercise, self).__init__(*args, **kw)
440 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
442 def handle_cwiczenie(self, element):
444 'exercise': element.attrib['typ'],
447 self.question_counter = 0
448 self.piece_counter = 0
450 header = etree.Element("parm")
451 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
452 header_cmd.append(header)
453 header.text = u"Zadanie %d." % self.options['exercise_counter']
455 pre = etree.tostring(header_cmd, encoding=unicode)
457 # Add a single <pytanie> tag if it's not there
458 if not element.xpath(".//pytanie"):
459 qpre, qpost = self.handle_pytanie(element)
464 def handle_pytanie(self, element):
465 """This will handle <cwiczenie> element, when there is no <pytanie>
467 self.question_counter += 1
468 self.piece_counter = 0
470 if self.options['teacher'] and element.attrib.get('rozw'):
471 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
474 def handle_punkt(self, element):
475 pre, post = super(Exercise, self).handle_punkt(element)
476 if self.options['teacher'] and element.attrib.get('rozw'):
477 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
480 def solution_header(self):
481 par = etree.Element("cmd", name="par")
482 parm = etree.Element("parm")
483 parm.text = u"Rozwiązanie:"
485 return etree.tostring(par)
487 def explicit_solution(self):
488 if self.options['solution']:
489 par = etree.Element("cmd", name="par")
490 parm = etree.Element("parm")
491 parm.text = self.options['solution']
493 return self.solution_header() + etree.tostring(par)
497 class Wybor(Exercise):
498 def handle_cwiczenie(self, element):
499 pre, post = super(Wybor, self).handle_cwiczenie(element)
500 is_single_choice = True
501 pytania = element.xpath(".//pytanie")
505 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
506 if len(solutions) != 1:
507 is_single_choice = False
509 choices = p.xpath(".//*[@nazwa]")
511 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
512 if len(choices) != len(uniq):
513 is_single_choice = False
516 self.options = {'single': is_single_choice}
519 def handle_punkt(self, element):
520 if self.options['exercise'] and element.attrib.get('nazwa', None):
521 cmd = 'radio' if self.options['single'] else 'checkbox'
522 return u'<cmd name="%s"/>' % cmd, ''
524 return super(Wybor, self).handle_punkt(element)
527 class Uporzadkuj(Exercise):
528 def handle_pytanie(self, element):
529 order_items = element.xpath(".//punkt/@rozw")
530 return super(Uporzadkuj, self).handle_pytanie(element)
533 class Przyporzadkuj(Exercise):
534 def handle_lista(self, lista):
535 header = etree.Element("parm")
536 header_cmd = etree.Element("cmd", name="par")
537 header_cmd.append(header)
538 if 'nazwa' in lista.attrib:
539 header.text = u"Kategorie:"
540 elif 'cel' in lista.attrib:
541 header.text = u"Elementy do przyporządkowania:"
543 header.text = u"Lista:"
544 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
545 pre = etree.tostring(header_cmd, encoding=unicode) + pre
549 class Luki(Exercise):
550 def find_pieces(self, question):
551 return question.xpath(".//luka")
553 def solution(self, piece):
554 piece = deepcopy(piece)
557 return sub.generate(piece)
559 def handle_pytanie(self, element):
560 qpre, qpost = super(Luki, self).handle_pytanie(element)
562 luki = self.find_pieces(element)
564 self.words = u"<env name='itemize'>%s</env>" % (
565 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
569 def handle_opis(self, element):
570 return '', self.words
572 def handle_luka(self, element):
574 if self.options['teacher']:
575 piece = deepcopy(element)
578 text = sub.generate(piece)
579 luka += u" [rozwiązanie: %s]" % text
584 def find_pieces(self, question):
585 return question.xpath(".//zastap")
587 def solution(self, piece):
588 return piece.attrib.get('rozw', '')
590 def list_header(self):
591 return u"Elementy do wstawienia"
593 def handle_zastap(self, element):
594 piece = deepcopy(element)
597 text = sub.generate(piece)
598 if self.options['teacher'] and element.attrib.get('rozw'):
599 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
603 class PrawdaFalsz(Exercise):
604 def handle_punkt(self, element):
605 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
606 if 'rozw' in element.attrib:
607 post += u" [Prawda/Fałsz]"
613 lists = tree.xpath(".//lista")
618 if p.tail is None: p.tail = ''
622 if p.text is None: p.text = ''
628 class EduModulePDFFormat(PDFFormat):
629 style = get_resource('res/styles/edumed/pdf/edumed.sty')
632 substitute_hyphens(self.wldoc.edoc)
633 fix_hanging(self.wldoc.edoc)
635 self.attachments = {}
639 "teacher": self.customization.get('teacher'),
641 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
643 open("/tmp/texml.xml", "w").write(texml)
646 def get_tex_dir(self):
647 temp = super(EduModulePDFFormat, self).get_tex_dir()
648 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
649 for name, iofile in self.attachments.items():
650 iofile.save_as(os.path.join(temp, name))
653 def get_image(self, name):
654 return self.wldoc.source.attachments[name]