1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, ifoption, tag_open_close
22 from librarian import DCNS, get_resource, IOFile
23 from librarian import functions
24 from pdf import PDFFormat, substitute_hyphens, fix_hanging
29 def _wrap(*args, **kw):
30 value = f(*args, **kw)
32 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
34 if isinstance(value, list):
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.activity_last = None
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
79 self.register_text_filter(swap_endlines)
80 self.register_text_filter(functions.substitute_entities)
81 self.register_text_filter(mark_alien_characters)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 """skip metadata in generation"""
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors if author)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
115 def handle_utwor(self, element):
118 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
120 \\documentclass[%s]{wl}
121 \\usepackage{style}''' % self.options['customization_str'],
122 self.options['has_cover'] and '\usepackage{makecover}',
123 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
124 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
125 (self.options['morefloats'] == 'none' and
126 u'''\\IfFileExists{morefloats.sty}{
127 \\usepackage{morefloats}
129 u'''\\def\\authors{%s}''' % self.get_authors(element),
130 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
131 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
132 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
134 u'''\\author{\\authors}''',
135 u'''\\title{%s}''' % self.get_title(element),
136 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
137 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
141 return u"".join(filter(None, lines)), u'</TeXML>'
144 def handle_powiesc(self, element):
146 <env name="document">
147 <cmd name="maketitle"/>
148 """, """<cmd name="editorialsection" /></env>"""
151 def handle_texcommand(self, element):
152 cmd = functions.texcommand(element.tag)
153 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
157 handle_akap_dialog = \
158 handle_autor_utworu = \
160 handle_didaskalia = \
161 handle_didask_tekst = \
162 handle_dlugi_cytat = \
163 handle_dzielo_nadrzedne = \
164 handle_lista_osoba = \
166 handle_miejsce_czas = \
168 handle_motto_podpis = \
169 handle_naglowek_akt = \
170 handle_naglowek_czesc = \
171 handle_naglowek_listy = \
172 handle_naglowek_osoba = \
173 handle_naglowek_scena = \
174 handle_nazwa_utworu = \
180 handle_poezja_cyt = \
183 handle_sekcja_asterysk = \
184 handle_sekcja_swiatlo = \
185 handle_separator_linia = \
186 handle_slowo_obce = \
188 handle_tytul_dziela = \
189 handle_wyroznienie = \
193 def handle_naglowek_rozdzial(self, element):
194 if not self.options['teacher']:
195 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
196 self.state['mute'] = False
198 self.state['mute'] = True
200 return self.handle_texcommand(element)
201 handle_naglowek_rozdzial.unmuter = True
203 def handle_naglowek_podrozdzial(self, element):
204 self.activity_counter = 0
205 if not self.options['teacher']:
206 if element.text.startswith(u'Dla ucznia'):
207 self.state['mute'] = False
209 elif element.text.startswith(u'Dla nauczyciela'):
210 self.state['mute'] = True
212 elif self.state['mute']:
214 return self.handle_texcommand(element)
215 handle_naglowek_podrozdzial.unmuter = True
217 def handle_uwaga(self, _e):
220 def handle_extra(self, _e):
223 def handle_nbsp(self, _e):
224 return '<spec cat="tilde" />'
226 _handle_strofa = cmd("strofa")
228 def handle_strofa(self, element):
229 self.options = {'strofa': True}
230 return self._handle_strofa(element)
232 def handle_aktywnosc(self, element):
233 self.activity_counter += 1
236 'activity_counter': self.activity_counter,
239 submill = EduModule(self.options, self.state)
241 if element.xpath('opis'):
242 opis = submill.generate(element.xpath('opis')[0])
246 n = element.xpath('wskazowki')
248 wskazowki = submill.generate(n[0])
251 n = element.xpath('pomoce')
254 pomoce = submill.generate(n[0])
258 forma = ''.join(element.xpath('forma/text()'))
260 czas = ''.join(element.xpath('czas/text()'))
262 counter = self.activity_counter
264 if element.getnext().tag == 'aktywnosc' or (len(self.activity_last) and self.activity_last.getnext() == element):
265 counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
269 self.activity_last = element
272 <cmd name="noindent" />
274 <cmd name="activityinfo"><parm>
275 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
276 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
277 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
286 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
287 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
289 @ifoption(sub_gen=True)
290 def handle_pomoce(self, _):
291 return "Pomoce: ", ""
293 def handle_czas(self, *_):
296 def handle_forma(self, *_):
299 def handle_lista(self, element, attrs=None):
300 ltype = element.attrib.get('typ', 'punkt')
301 if not element.findall("punkt"):
302 if ltype == 'czytelnia':
303 return 'W przygotowaniu.'
306 if ltype == 'slowniczek':
307 surl = element.attrib.get('src', None)
309 # print '** missing src on <slowniczek>, setting default'
310 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
311 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
312 self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
318 'slowniczek': 'itemize',
319 'czytelnia': 'itemize'
322 return u'<env name="%s">' % listcmd, u'</env>'
324 def handle_punkt(self, element):
325 return '<cmd name="item"/>', ''
327 def handle_cwiczenie(self, element):
328 exercise_handlers = {
330 'uporzadkuj': Uporzadkuj,
333 'przyporzadkuj': Przyporzadkuj,
334 'prawdafalsz': PrawdaFalsz
337 typ = element.attrib['typ']
338 self.exercise_counter += 1
339 if typ not in exercise_handlers:
340 return '(no handler)'
341 self.options = {'exercise_counter': self.exercise_counter}
342 handler = exercise_handlers[typ](self.options, self.state)
343 return handler.generate(element)
345 # XXX this is copied from pyhtml.py, except for return and
346 # should be refactored for no code duplication
347 def handle_definiendum(self, element):
348 nxt = element.getnext()
351 # let's pull definiens from another document
352 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
353 sxml = self.options['slowniczek_xml']
354 assert element.text != ''
355 if "'" in (element.text or ''):
356 defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
358 defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
360 definiens = defloc[0].getnext()
361 if definiens.tag == 'definiens':
362 subgen = EduModule(self.options, self.state)
363 definiens_s = subgen.generate(definiens)
365 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
367 def handle_definiens(self, element):
370 def handle_podpis(self, element):
371 return u"""<env name="figure">""", u"</env>"
373 def handle_tabela(self, element):
375 for w in element.xpath("wiersz"):
377 if max_col < len(ks):
379 self.options = {'columnts': max_col}
381 # has_frames = int(element.attrib.get("ramki", "0"))
382 # if has_frames: frames_c = "framed"
383 # else: frames_c = ""
384 # return u"""<table class="%s">""" % frames_c, u"</table>"
386 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
387 ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
390 def handle_wiersz(self, element):
391 return u"", u'<ctrl ch="\\"/>'
394 def handle_kol(self, element):
395 if element.getnext() is not None:
396 return u"", u'<spec cat="align" />'
399 def handle_link(self, element):
400 if element.attrib.get('url'):
401 url = element.attrib.get('url')
402 if url == element.text:
403 return cmd('url')(self, element)
405 return cmd('href', parms=[element.attrib['url']])(self, element)
407 return cmd('emph')(self, element)
409 def handle_obraz(self, element):
410 frmt = self.options['format']
411 name = element.attrib.get('nazwa', '').strip()
412 image = frmt.get_image(name.strip())
413 name = image.get_filename().rsplit('/', 1)[-1]
414 img_path = "obraz/%s" % name.replace("_", "")
415 frmt.attachments[img_path] = image
416 return cmd("obraz", parms=[img_path])(self)
418 def handle_video(self, element):
419 url = element.attrib.get('url')
421 print '!! <video> missing url'
423 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
425 print '!! unknown <video> url scheme:', url
428 thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
429 img_path = "video/%s.jpg" % name.replace("_", "")
430 self.options['format'].attachments[img_path] = thumb
431 canon_url = "https://www.youtube.com/watch?v=%s" % name
432 return cmd("video", parms=[img_path, canon_url])(self)
435 class Exercise(EduModule):
436 def __init__(self, *args, **kw):
437 self.question_counter = 0
438 super(Exercise, self).__init__(*args, **kw)
439 self.piece_counter = None
441 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
443 def handle_cwiczenie(self, element):
445 'exercise': element.attrib['typ'],
448 self.question_counter = 0
449 self.piece_counter = 0
451 header = etree.Element("parm")
452 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
453 header_cmd.append(header)
454 header.text = u"Zadanie %d." % self.options['exercise_counter']
456 pre = etree.tostring(header_cmd, encoding=unicode)
458 # Add a single <pytanie> tag if it's not there
459 if not element.xpath(".//pytanie"):
460 qpre, qpost = self.handle_pytanie(element)
465 def handle_pytanie(self, element):
466 """This will handle <cwiczenie> element, when there is no <pytanie>
468 self.question_counter += 1
469 self.piece_counter = 0
471 if self.options['teacher'] and element.attrib.get('rozw'):
472 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
475 def handle_punkt(self, element):
476 pre, post = super(Exercise, self).handle_punkt(element)
477 if self.options['teacher'] and element.attrib.get('rozw'):
478 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
481 def solution_header(self):
482 par = etree.Element("cmd", name="par")
483 parm = etree.Element("parm")
484 parm.text = u"Rozwiązanie:"
486 return etree.tostring(par)
488 def explicit_solution(self):
489 if self.options['solution']:
490 par = etree.Element("cmd", name="par")
491 parm = etree.Element("parm")
492 parm.text = self.options['solution']
494 return self.solution_header() + etree.tostring(par)
497 class Wybor(Exercise):
498 def handle_cwiczenie(self, element):
499 pre, post = super(Wybor, self).handle_cwiczenie(element)
500 is_single_choice = True
501 pytania = element.xpath(".//pytanie")
505 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
506 if len(solutions) != 1:
507 is_single_choice = False
509 choices = p.xpath(".//*[@nazwa]")
512 uniq.add(n.attrib.get('nazwa', ''))
513 if len(choices) != len(uniq):
514 is_single_choice = False
517 self.options = {'single': is_single_choice}
520 def handle_punkt(self, element):
521 if self.options['exercise'] and element.attrib.get('nazwa', None):
522 cmd = 'radio' if self.options['single'] else 'checkbox'
523 return u'<cmd name="%s"/>' % cmd, ''
525 return super(Wybor, self).handle_punkt(element)
528 class Uporzadkuj(Exercise):
529 def handle_pytanie(self, element):
530 order_items = element.xpath(".//punkt/@rozw")
531 return super(Uporzadkuj, self).handle_pytanie(element)
534 class Przyporzadkuj(Exercise):
535 def handle_lista(self, lista):
536 header = etree.Element("parm")
537 header_cmd = etree.Element("cmd", name="par")
538 header_cmd.append(header)
539 if 'nazwa' in lista.attrib:
540 header.text = u"Kategorie:"
541 elif 'cel' in lista.attrib:
542 header.text = u"Elementy do przyporządkowania:"
544 header.text = u"Lista:"
545 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
546 pre = etree.tostring(header_cmd, encoding=unicode) + pre
550 class Luki(Exercise):
551 def find_pieces(self, question):
552 return question.xpath(".//luka")
554 def solution(self, piece):
555 piece = deepcopy(piece)
558 return sub.generate(piece)
560 def handle_pytanie(self, element):
561 qpre, qpost = super(Luki, self).handle_pytanie(element)
563 luki = self.find_pieces(element)
565 self.words = u"<env name='itemize'>%s</env>" % (
566 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
570 def handle_opis(self, element):
571 return '', self.words
573 def handle_luka(self, element):
575 if self.options['teacher']:
576 piece = deepcopy(element)
579 text = sub.generate(piece)
580 luka += u" [rozwiązanie: %s]" % text
585 def find_pieces(self, question):
586 return question.xpath(".//zastap")
588 def solution(self, piece):
589 return piece.attrib.get('rozw', '')
591 def list_header(self):
592 return u"Elementy do wstawienia"
594 def handle_zastap(self, element):
595 piece = deepcopy(element)
598 text = sub.generate(piece)
599 if self.options['teacher'] and element.attrib.get('rozw'):
600 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
604 class PrawdaFalsz(Exercise):
605 def handle_punkt(self, element):
606 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
607 if 'rozw' in element.attrib:
608 post += u" [Prawda/Fałsz]"
613 lists = tree.xpath(".//lista")
630 class EduModulePDFFormat(PDFFormat):
631 style = get_resource('res/styles/edumed/pdf/edumed.sty')
634 substitute_hyphens(self.wldoc.edoc)
635 fix_hanging(self.wldoc.edoc)
637 self.attachments = {}
641 "teacher": self.customization.get('teacher'),
643 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
645 open("/tmp/texml.xml", "w").write(texml)
648 def get_tex_dir(self):
649 temp = super(EduModulePDFFormat, self).get_tex_dir()
650 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
651 for name, iofile in self.attachments.items():
652 iofile.save_as(os.path.join(temp, name))
655 def get_image(self, name):
656 return self.wldoc.source.attachments[name]