1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
8 from tempfile import mkdtemp, NamedTemporaryFile
10 from lxml import etree
11 from subprocess import call
13 from librarian import IOFile, Format, ParseError, get_resource
14 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
15 from librarian import functions
18 from copy import deepcopy
20 IMAGE_THUMB_WIDTH = 300
23 class EduModule(Xmill):
24 def __init__(self, options=None):
25 super(EduModule, self).__init__(options)
26 self.activity_counter = 0
27 self.activity_last = None
28 self.exercise_counter = 0
31 def swap_endlines(txt):
32 if self.options['strofa']:
33 txt = txt.replace("/\n", "<br/>\n")
35 self.register_text_filter(functions.substitute_entities)
36 self.register_escaped_text_filter(swap_endlines)
38 @tagged('div', 'stanza')
39 def handle_strofa(self, element):
40 self.options = {'strofa': True}
43 def handle_powiesc(self, element):
44 return u"""<!DOCTYPE html>
47 <meta http-equiv="content-type" content="text/html; charset=UTF-8">
48 <title>Edukacja medialna</title>
49 <link href="weasy.css" rel="stylesheet" type="text/css">
50 <meta charset="UTF-8">
53 <div class="module" id="book-text">
59 handle_autor_utworu = tag("span", "author")
60 handle_dzielo_nadrzedne = tag("span", "collection")
61 handle_podtytul = tag("span", "subtitle")
62 handle_naglowek_akt = handle_naglowek_czesc = handle_srodtytul = tag("h2")
63 handle_naglowek_scena = tag('h2')
64 handle_naglowek_osoba = tag('h3')
65 handle_akap = handle_akap_dialog = handle_akap_cd = tag('p', 'paragraph')
67 handle_wyroznienie = tag('em')
68 handle_tytul_dziela = tag('em', 'title')
69 handle_slowo_obce = tag('em', 'foreign')
71 def naglowek_to_anchor(self, naglowek):
72 return self.options['urlmapper'].naglowek_to_anchor(naglowek)
74 def handle_nazwa_utworu(self, element):
75 return "<h1 class='title' id='top'>", "</h1>"
77 def handle_naglowek_rozdzial(self, element):
78 return tag_open_close("h2", id=self.naglowek_to_anchor(element))
80 def handle_naglowek_podrozdzial(self, element):
81 self.activity_counter = 0
82 if element.text.strip() == u'Przebieg zajęć':
83 return tag('h3', 'activities-header')(self, element)
84 return tag('h3')(self, element)
86 def handle_uwaga(self, _e):
89 def handle_aktywnosc(self, element):
90 self.activity_counter += 1
91 parity = 'odd' if self.activity_counter % 2 == 1 else 'even'
92 if self.activity_counter == 1:
96 'activity_counter': self.activity_counter,
98 submill = EduModule(dict(self.options.items() + {'sub_gen': True}.items()))
100 if element.xpath('opis'):
101 opis = submill.generate(element.xpath('opis')[0])
105 n = element.xpath('wskazowki')
107 wskazowki = submill.generate(n[0])
111 n = element.xpath('pomoce')
113 pomoce = submill.generate(n[0])
117 forma = ''.join(element.xpath('forma/text()'))
118 get_forma_url = self.options['urlmapper'].get_forma_url
120 for form_name in forma.split(','):
121 name = form_name.strip()
122 url = get_forma_url(name)
124 forms.append("<a href='%s'>%s</a>" % (url, name))
127 forma = ', '.join(forms)
129 forma = '<tr class="infobox kind"><th>Metoda</th><td><p>%s</p></td></tr>' % forma
131 czas = ''.join(element.xpath('czas/text()'))
133 czas = '<tr class="infobox time"><th>Czas</th><td><p>%s min</p></td></tr>' % czas
135 counter = self.activity_counter
137 if element.getnext().tag == 'aktywnosc' or (len(self.activity_last) and self.activity_last.getnext() == element):
138 counter_html = """<span class="act_counter">%(counter)d.</span>""" % {'counter': counter}
142 self.activity_last = element
146 <div class="activity %(parity)s">
148 <div class="description">
150 %(opis)s""" % {'counter_html': counter_html, 'opis': opis, 'parity': parity},
159 <div class="clearboth"></div>
161 """ % {'wskazowki': wskazowki, 'czas': czas, 'forma': forma, 'pomoce': pomoce})
163 handle_opis = ifoption(sub_gen=True)(tag('div', 'desc'))
164 handle_wskazowki = ifoption(sub_gen=True)(tag('div', ('hints', 'teacher')))
166 @ifoption(sub_gen=True)
167 @tagged('tr', 'infobox materials')
168 def handle_pomoce(self, _):
169 return """<th>Pomoce</th><td>""", "</td>"
171 def handle_czas(self, *_):
174 def handle_forma(self, *_):
177 def handle_cwiczenie(self, element):
178 exercise_handlers = {
180 'uporzadkuj': Uporzadkuj,
183 'przyporzadkuj': Przyporzadkuj,
184 'prawdafalsz': PrawdaFalsz
187 typ = element.attrib['typ']
188 self.exercise_counter += 1
189 self.options = {'exercise_counter': self.exercise_counter}
190 handler = exercise_handlers[typ](self.options)
191 return handler.generate(element)
194 def handle_lista(self, element, attrs=None):
197 ltype = element.attrib.get('typ', 'punkt')
198 if not element.findall("punkt"):
199 if ltype == 'czytelnia':
200 return '<p>W przygotowaniu.</p>'
203 if ltype == 'slowniczek':
204 surl = element.attrib.get('src', None)
206 # print '** missing src on <slowniczek>, setting default'
207 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
208 sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
210 self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
211 return '<div class="slowniczek"><dl>', '</dl></div>'
217 'czytelnia': 'ul'}[ltype]
219 classes = attrs.get('class', '')
223 attrs_s = ' '.join(['%s="%s"' % kv for kv in attrs.items()])
225 attrs_s = ' ' + attrs_s
227 return '<%s class="lista %s %s"%s>' % (listtag, ltype, classes, attrs_s), '</%s>' % listtag
229 def handle_punkt(self, element):
230 if self.options['slowniczek']:
233 return '<li>', '</li>'
235 def handle_definiendum(self, element):
236 nxt = element.getnext()
240 print "!! Empty <definiendum/>"
243 # let's pull definiens from another document
244 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
245 sxml = self.options['slowniczek_xml']
246 if "'" in (element.text or ''):
247 defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
249 defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
251 definiens = defloc[0].getnext()
252 if definiens.tag == 'definiens':
253 subgen = EduModule(self.options)
254 definiens_s = subgen.generate(definiens)
256 print ("!! Missing definiendum in source: '%s'" % element.text).encode('utf-8')
258 return u"<dt id='%s'>" % self.naglowek_to_anchor(element), u"</dt>" + definiens_s
260 def handle_definiens(self, element):
261 return u"<dd>", u"</dd>"
263 def handle_podpis(self, element):
264 return u"""<div class="caption">""", u"</div>"
266 def handle_tabela(self, element):
267 has_frames = int(element.attrib.get("ramki", "0"))
268 frames_c = "framed" if has_frames else ""
269 return u"""<table class="%s">""" % frames_c, u"</table>"
271 def handle_wiersz(self, element):
272 return u"<tr>", u"</tr>"
274 def handle_kol(self, element):
275 return u"<td>", u"</td>"
277 def handle_rdf__RDF(self, _):
278 # ustal w opcjach rzeczy :D
281 def handle_link(self, element):
282 if 'url' in element.attrib:
283 return tag('a', href=element.attrib['url'])(self, element)
284 elif 'material' in element.attrib:
285 material_err = u' [BRAKUJĄCY MATERIAŁ]'
286 slug = element.attrib['material']
289 return self.options['urlmapper'].url_for_material(slug, f)
291 formats = self.options['urlmapper'].materials(slug)
294 def_href = make_url(formats[0][0])
296 except (IndexError, self.options['urlmapper'].MaterialNotFound):
297 def_err = material_err
300 for f in formats[1:]:
302 fmt_links.append(u'<a href="%s">%s</a>' % (make_url(f[0]), f[0].upper()))
303 except self.options['urlmapper'].MaterialNotFound:
304 fmt_links.append(u'<a>%s%s</a>' % (f[0].upper(), material_err))
305 more_links = u' (%s)' % u', '.join(fmt_links) if fmt_links else u''
307 return u"<a href='%s'>" % def_href, u'%s</a>%s' % (def_err, more_links)
309 def handle_obraz(self, element):
310 format = self.options['urlmapper']
311 name = element.attrib.get('nazwa', '').strip()
313 print '!! <obraz> missing "nazwa"'
315 alt = element.attrib.get('alt', '')
317 print '** <obraz> missing "alt"'
318 slug, ext = name.rsplit('.', 1)
319 image = format.image(slug, ext)
320 name = image.name.rsplit('/', 1)[-1]
321 e = etree.Element("a", attrib={"class": "image"})
322 e.append(etree.Element("img", attrib={
325 "width": str(IMAGE_THUMB_WIDTH)}))
326 format.attachments[name] = self.options['media_root'] + image.name
327 return etree.tostring(e, encoding=unicode), u""
329 def handle_video(self, element):
330 url = element.attrib.get('url')
332 print '!! <video> missing url'
334 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
336 print '!! unknown <video> url scheme:', url
338 return """<iframe width="630" height="384" src="http://www.youtube.com/embed/%s"
339 frameborder="0" allowfullscreen></iframe>""" % m.group(1), ""
342 class Exercise(EduModule):
345 def __init__(self, *args, **kw):
346 self.question_counter = 0
347 super(Exercise, self).__init__(*args, **kw)
348 self.instruction_printed = False
349 self.piece_counter = None
351 @tagged('div', 'description')
352 def handle_opis(self, element):
353 return "", self.get_instruction()
355 def handle_rozw_kom(self, element):
356 return u"""<div style="display:none" class="comment">""", u"""</div>"""
358 def extra_attributes(self):
361 def handle_cwiczenie(self, element):
362 self.options = {'exercise': element.attrib['typ']}
363 self.question_counter = 0
364 self.piece_counter = 0
366 extra_attrs = self.extra_attributes()
369 <div class="exercise %(typ)s" data-type="%(typ)s"%(extra_attrs)s>
370 <h3>Zadanie %(exercies_counter)d</h3>
372 'exercies_counter': self.options['exercise_counter'],
373 'typ': element.attrib['typ'],
374 'extra_attrs': ' ' + ' '.join(
375 'data-%s="%s"' % item for item in extra_attrs.iteritems()) if extra_attrs else '',
380 # Add a single <pytanie> tag if it's not there
381 if not element.xpath(".//pytanie"):
382 qpre, qpost = self.handle_pytanie(element)
387 def handle_pytanie(self, element):
388 """This will handle <cwiczenie> element, when there is no <pytanie>
391 self.question_counter += 1
392 self.piece_counter = 0
393 solution = element.attrib.get('rozw', None)
394 solution_s = ' data-solution="%s"' % solution if solution else ''
396 handles = element.attrib.get('uchwyty', None)
398 add_class += ' handles handles-%s' % handles
399 self.options = {'handles': handles}
401 minimum = element.attrib.get('min', None)
402 minimum_s = ' data-minimum="%d"' % int(minimum) if minimum else ''
404 return '<div class="question%s" data-no="%d" %s>' %\
405 (add_class, self.question_counter, solution_s + minimum_s), \
408 def get_instruction(self):
409 if not self.instruction_printed:
410 self.instruction_printed = True
412 return u'<span class="instruction">%s</span>' % self.INSTRUCTION
419 class Wybor(Exercise):
420 def extra_attributes(self):
421 return {'subtype': 'single' if self.options['single'] else 'multiple'}
423 def handle_cwiczenie(self, element):
424 is_single_choice = True
425 pytania = element.xpath(".//pytanie")
429 solutions = p.xpath(".//punkt[@rozw='prawda']")
430 if len(solutions) != 1:
431 is_single_choice = False
434 self.options = {'single': is_single_choice}
435 return super(Wybor, self).handle_cwiczenie(element)
437 def handle_punkt(self, element):
438 if self.options['exercise'] and element.attrib.get('rozw', None):
439 qc = self.question_counter
440 self.piece_counter += 1
441 no = self.piece_counter
442 eid = "q%(qc)d_%(no)d" % locals()
443 sol = element.attrib.get('rozw', None)
444 params = {'qc': qc, 'no': no, 'sol': sol, 'eid': eid}
445 if self.options['single']:
446 input_tag = u'<input type="radio" name="q%(qc)d" id="%(eid)s" value="%(eid)s" />'
448 input_tag = u'<input type="checkbox" name="%(eid)s" id="%(eid)s" />'
450 <li class="question-piece" data-qc="%(qc)d" data-no="%(no)d" data-sol="%(sol)s">
451 """ + input_tag + u"""
452 <label for="%(eid)s">""") % params, u"</label></li>"
454 return super(Wybor, self).handle_punkt(element)
457 class Uporzadkuj(Exercise):
458 INSTRUCTION = u"Kliknij wybraną odpowiedź i przeciągnij w nowe miejsce."
460 def handle_pytanie(self, element):
462 Overrides the returned content default handle_pytanie
464 # we ignore the result, returning our own
465 super(Uporzadkuj, self).handle_pytanie(element)
466 order_items = element.xpath(".//punkt/@rozw")
468 return u"""<div class="question" data-original="%s" data-no="%s">""" % \
469 (','.join(order_items), self.question_counter), \
472 def handle_punkt(self, element):
473 return """<li class="question-piece" data-pos="%(rozw)s">""" \
478 class Luki(Exercise):
479 INSTRUCTION = u"Przeciągnij odpowiedzi i upuść w wybranym polu."
481 def find_pieces(self, question):
482 return question.xpath(".//luka")
484 def solution_html(self, piece):
485 piece = deepcopy(piece)
488 return sub.generate(piece)
490 def handle_pytanie(self, element):
491 qpre, qpost = super(Luki, self).handle_pytanie(element)
493 luki = list(enumerate(self.find_pieces(element)))
496 for (i, luka) in luki:
498 luka_html = self.solution_html(luka)
499 luki_html += u'<span class="draggable question-piece" data-no="%d">%s</span>' % (i, luka_html)
500 self.words_html = '<div class="words">%s</div>' % luki_html
504 def handle_opis(self, element):
505 return '', self.words_html
507 def handle_luka(self, element):
508 self.piece_counter += 1
509 return '<span class="placeholder" data-solution="%d"></span>' % self.piece_counter
513 INSTRUCTION = u"Przeciągnij odpowiedzi i upuść je na słowie lub wyrażeniu, które chcesz zastąpić."
515 def find_pieces(self, question):
516 return question.xpath(".//zastap")
518 def solution_html(self, piece):
519 return piece.attrib.get('rozw', '')
521 def handle_zastap(self, element):
522 self.piece_counter += 1
523 return '<span class="placeholder zastap question-piece" data-solution="%d">' \
524 % self.piece_counter, '</span>'
527 class Przyporzadkuj(Exercise):
528 INSTRUCTION = [u"Przeciągnij odpowiedzi i upuść w wybranym polu.",
529 u"Kliknij numer odpowiedzi, przeciągnij i upuść w wybranym polu."]
531 def get_instruction(self):
532 if not self.instruction_printed:
533 self.instruction_printed = True
534 return u'<span class="instruction">%s</span>' % self.INSTRUCTION[self.options['handles'] and 1 or 0]
538 def handle_cwiczenie(self, element):
539 pre, post = super(Przyporzadkuj, self).handle_cwiczenie(element)
540 lista_with_handles = element.xpath(".//*[@uchwyty]")
541 if lista_with_handles:
542 self.options = {'handles': True}
545 def handle_pytanie(self, element):
546 pre, post = super(Przyporzadkuj, self).handle_pytanie(element)
547 minimum = element.attrib.get("min", None)
549 self.options = {"min": int(minimum)}
552 def handle_lista(self, lista):
553 if 'nazwa' in lista.attrib:
555 'data-name': lista.attrib['nazwa'],
558 self.options = {'predicate': True}
559 elif 'cel' in lista.attrib:
561 'data-target': lista.attrib['cel'],
564 if lista.attrib.get('krotkie'):
565 self.options = {'short': True}
566 self.options = {'subject': True}
569 pre, post = super(Przyporzadkuj, self).handle_lista(lista, attrs)
570 return pre, post + '<br class="clearboth"/>'
572 def handle_punkt(self, element):
573 if self.options['subject']:
574 self.piece_counter += 1
575 if self.options['handles']:
577 '<li><span data-solution="%s" data-no="%s" '
578 'class="question-piece draggable handle add-li">%s</span>' % (
579 element.attrib.get('rozw', ''),
585 if self.options['short']:
586 extra_class += ' short'
587 return '<li data-solution="%s" data-no="%s" class="question-piece draggable%s">' % (
588 element.attrib.get('rozw', ''),
589 self.piece_counter, extra_class), '</li>'
591 elif self.options['predicate']:
592 if self.options['min']:
593 placeholders = u'<li class="placeholder"></li>' * self.options['min']
595 placeholders = u'<li class="placeholder multiple"></li>'
597 '<li data-predicate="%s">' % element.attrib.get('nazwa', ''),
598 '<ul class="subjects">' + placeholders + '</ul></li>')
601 return super(Przyporzadkuj, self).handle_punkt(element)
604 class PrawdaFalsz(Exercise):
605 def handle_punkt(self, element):
606 if 'rozw' in element.attrib:
607 return u'''<li data-solution="%s" class="question-piece">
608 <span class="buttons">
609 <a data-value="true" class="true">Prawda</a>
610 <a data-value="false" class="false">Fałsz</a>
612 <span class="question-piece-text">''' % {
615 }[element.attrib['rozw']], '</span></li>'
617 return super(PrawdaFalsz, self).handle_punkt(element)
620 class EduModuleWeasyFormat(Format):
621 PRIMARY_MATERIAL_FORMATS = ('pdf', 'odt')
623 class MaterialNotFound(BaseException):
626 def __init__(self, wldoc, media_root='', save_html_to=None, **kwargs):
627 super(EduModuleWeasyFormat, self).__init__(wldoc, **kwargs)
628 self.media_root = media_root
629 self.materials_by_slug = None
630 self.attachments = {}
631 self.save_html_to = save_html_to
634 self.attachments = {}
636 'provider': self.wldoc.provider,
639 'media_root': self.media_root,
641 return edumod.generate(self.wldoc.edoc.getroot())
643 def get_weasy_dir(self):
644 html = self.get_html()
645 temp = mkdtemp('-weasy')
647 html_path = os.path.join(temp, 'doc.html')
648 with open(html_path, 'w') as fout:
649 fout.write(html.encode('utf-8'))
651 weasy_dir = os.path.join(os.path.dirname(__file__), 'weasy')
652 for filename in os.listdir(weasy_dir):
653 shutil.copy(get_resource('weasy/%s' % filename), temp)
654 for name, path in self.attachments.items():
655 shutil.copy(path, os.path.join(temp, name))
659 temp = self.get_weasy_dir()
660 if self.save_html_to:
661 save_path = os.path.join(self.save_html_to, 'weasy-html')
662 shutil.rmtree(save_path, ignore_errors=True)
663 shutil.copytree(temp, save_path)
664 html_path = os.path.join(temp, 'doc.html')
665 pdf_path = os.path.join(temp, 'doc.pdf')
672 WEASY_COMMAND = '/home/janek/Desktop/weasy-test/bin/weasyprint'
674 p = call([WEASY_COMMAND, html_path, pdf_path])
676 raise ParseError("Error parsing .html file: %s" % html_path)
681 output_file = NamedTemporaryFile(prefix='librarian', suffix='.pdf', delete=False)
682 shutil.move(pdf_path, output_file.name)
683 # shutil.rmtree(temp)
684 return IOFile.from_filename(output_file.name)
687 # Sort materials by slug.
688 self.materials_by_slug = {}
689 for name, att in self.wldoc.source.attachments.items():
690 parts = name.rsplit('.', 1)
694 if slug not in self.materials_by_slug:
695 self.materials_by_slug[slug] = {}
696 self.materials_by_slug[slug][ext] = att
697 return self.get_pdf()
699 def materials(self, slug):
700 """Returns a list of pairs: (ext, iofile)."""
701 order = {pmf: i for (i, pmf) in enumerate(self.PRIMARY_MATERIAL_FORMATS)}
702 mats = self.materials_by_slug.get(slug, {}).items()
704 print ("!! Material missing: '%s'" % slug).encode('utf-8')
705 return sorted(mats, key=lambda (x, y): order.get(x, x))
707 def url_for_material(self, slug, fmt):
708 return "%s.%s" % (slug, fmt)
710 def url_for_image(self, slug, fmt, width=None):
711 return self.url_for_material(slug, fmt)
713 def text_to_anchor(self, text):
714 return re.sub(r" +", " ", text)
716 def naglowek_to_anchor(self, naglowek):
717 return self.text_to_anchor(naglowek.text.strip())
719 def get_forma_url(self, forma):
722 def get_help_url(self, naglowek):
726 def transform(wldoc, stylesheet='edumed', options=None, flags=None, verbose=None):
727 """Transforms the WL document to XHTML.
729 If output_filename is None, returns an XML,
730 otherwise returns True if file has been written,False if it hasn't.
731 File won't be written if it has no content.
733 edumodfor = EduModuleWeasyFormat(wldoc)
734 return edumodfor.build()