1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from copy import deepcopy
17 from urllib2 import urlopen
19 from lxml import etree
21 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
22 from librarian.dcparser import Person
23 from librarian import DCNS, get_resource, IOFile
24 from librarian import functions
25 from pdf import PDFFormat, substitute_hyphens, fix_hanging
30 def _wrap(*args, **kw):
31 value = f(*args, **kw)
33 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
35 if isinstance(value, list):
36 import pdb; pdb.set_trace()
37 if isinstance(value, tuple):
38 return prefix + value[0], value[1] + postfix
40 return prefix + value + postfix
45 def cmd(name, parms=None):
46 def wrap(self, element=None):
47 pre, post = tag_open_close('cmd', name=name)
51 e = etree.Element("parm")
53 pre += etree.tostring(e)
54 if element is not None:
56 post = "</parm>" + post
63 def mark_alien_characters(text):
64 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
68 class EduModule(Xmill):
69 def __init__(self, options=None, state=None):
70 super(EduModule, self).__init__(options, state)
71 self.activity_counter = 0
72 self.exercise_counter = 0
74 def swap_endlines(txt):
75 if self.options['strofa']:
76 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
78 self.register_text_filter(swap_endlines)
79 self.register_text_filter(functions.substitute_entities)
80 self.register_text_filter(mark_alien_characters)
82 def get_dc(self, element, dc_field, single=False):
83 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
88 def handle_rdf__RDF(self, _):
89 "skip metadata in generation"
93 def get_rightsinfo(self, element):
94 rights_lic = self.get_dc(element, 'rights.license', True)
95 return u'<cmd name="rightsinfostr">' + \
96 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
97 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
101 def get_authors(self, element, which=None):
102 dc = self.options['wldoc'].book_info
104 authors = dc.authors_textbook + \
105 dc.authors_scenario + \
108 authors = getattr(dc, "authors_%s" % which)
109 return u', '.join(author.readable() for author in authors)
112 def get_title(self, element):
113 return self.get_dc(element, 'title', True)
115 def handle_utwor(self, element):
118 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
120 \\documentclass[%s]{wl}
121 \\usepackage{style}''' % self.options['customization_str'],
122 self.options['has_cover'] and '\usepackage{makecover}',
123 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
124 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
125 (self.options['morefloats'] == 'none' and
126 u'''\\IfFileExists{morefloats.sty}{
127 \\usepackage{morefloats}
129 u'''\\def\\authors{%s}''' % self.get_authors(element),
130 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
131 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
132 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
134 u'''\\author{\\authors}''',
135 u'''\\title{%s}''' % self.get_title(element),
136 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
137 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
140 return u"".join(filter(None, lines)), u'</TeXML>'
144 def handle_powiesc(self, element):
146 <env name="document">
147 <cmd name="maketitle"/>
148 """, """<cmd name="editorialsection" /></env>"""
151 def handle_texcommand(self, element):
152 cmd = functions.texcommand(element.tag)
153 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
159 handle_akap_dialog = \
160 handle_akap_dialog = \
161 handle_autor_utworu = \
163 handle_didaskalia = \
164 handle_didask_tekst = \
165 handle_dlugi_cytat = \
166 handle_dzielo_nadrzedne = \
167 handle_lista_osoba = \
169 handle_miejsce_czas = \
171 handle_motto_podpis = \
172 handle_naglowek_akt = \
173 handle_naglowek_czesc = \
174 handle_naglowek_listy = \
175 handle_naglowek_osoba = \
176 handle_naglowek_podrozdzial = \
177 handle_naglowek_scena = \
178 handle_nazwa_utworu = \
184 handle_poezja_cyt = \
187 handle_sekcja_asterysk = \
188 handle_sekcja_swiatlo = \
189 handle_separator_linia = \
190 handle_slowo_obce = \
192 handle_tytul_dziela = \
193 handle_wyroznienie = \
197 def handle_naglowek_rozdzial(self, element):
198 if not self.options['teacher']:
200 if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek')):
202 self.state['mute'] = False
205 self.state['mute'] = True
207 return self.handle_texcommand(element)
208 handle_naglowek_rozdzial.unmuter = True
211 def handle_uwaga(self, _e):
213 def handle_extra(self, _e):
216 def handle_nbsp(self, _e):
217 return '<spec cat="tilde" />'
219 _handle_strofa = cmd("strofa")
221 def handle_strofa(self, element):
222 self.options = {'strofa': True}
223 return self._handle_strofa(element)
225 def handle_aktywnosc(self, element):
226 self.activity_counter += 1
229 'activity_counter': self.activity_counter,
232 submill = EduModule(self.options, self.state)
234 if element.xpath('opis'):
235 opis = submill.generate(element.xpath('opis')[0])
239 n = element.xpath('wskazowki')
240 if n: wskazowki = submill.generate(n[0])
243 n = element.xpath('pomoce')
245 if n: pomoce = submill.generate(n[0])
248 forma = ''.join(element.xpath('forma/text()'))
250 czas = ''.join(element.xpath('czas/text()'))
252 counter = self.activity_counter
255 <cmd name="noindent" />
256 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
257 <cmd name="activityinfo"><parm>
258 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
259 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
260 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
269 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
270 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
272 @ifoption(sub_gen=True)
273 def handle_pomoce(self, _):
274 return "Pomoce: ", ""
276 def handle_czas(self, *_):
279 def handle_forma(self, *_):
282 def handle_lista(self, element, attrs={}):
283 ltype = element.attrib.get('typ', 'punkt')
284 if not element.findall("punkt"):
285 if ltype == 'czytelnia':
286 return 'W przygotowaniu.'
289 if ltype == 'slowniczek':
290 surl = element.attrib.get('src', None)
292 # print '** missing src on <slowniczek>, setting default'
293 surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
296 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
297 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
299 listcmd = {'num': 'enumerate',
302 'slowniczek': 'itemize',
303 'czytelnia': 'itemize'}[ltype]
305 return u'<env name="%s">' % listcmd, u'</env>'
307 def handle_punkt(self, element):
308 return '<cmd name="item"/>', ''
310 def handle_cwiczenie(self, element):
311 exercise_handlers = {
313 'uporzadkuj': Uporzadkuj,
316 'przyporzadkuj': Przyporzadkuj,
317 'prawdafalsz': PrawdaFalsz
320 typ = element.attrib['typ']
321 self.exercise_counter += 1
322 if not typ in exercise_handlers:
323 return '(no handler)'
324 self.options = {'exercise_counter': self.exercise_counter}
325 handler = exercise_handlers[typ](self.options, self.state)
326 return handler.generate(element)
328 # XXX this is copied from pyhtml.py, except for return and
329 # should be refactored for no code duplication
330 def handle_definiendum(self, element):
331 nxt = element.getnext()
334 # let's pull definiens from another document
335 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
336 sxml = self.options['slowniczek_xml']
337 assert element.text != ''
338 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
340 definiens = defloc[0].getnext()
341 if definiens.tag == 'definiens':
342 subgen = EduModule(self.options, self.state)
343 definiens_s = subgen.generate(definiens)
345 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
347 def handle_definiens(self, element):
350 def handle_podpis(self, element):
351 return u"""<env name="figure">""", u"</env>"
353 def handle_tabela(self, element):
355 for w in element.xpath("wiersz"):
357 if max_col < len(ks):
359 self.options = {'columnts': max_col}
361 # has_frames = int(element.attrib.get("ramki", "0"))
362 # if has_frames: frames_c = "framed"
363 # else: frames_c = ""
364 # return u"""<table class="%s">""" % frames_c, u"</table>"
366 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
367 ''' % ('l' * max_col), \
368 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
371 def handle_wiersz(self, element):
372 return u"", u'<ctrl ch="\\"/>'
375 def handle_kol(self, element):
376 if element.getnext() is not None:
377 return u"", u'<spec cat="align" />'
380 def handle_link(self, element):
381 if element.attrib.get('url'):
382 url = element.attrib.get('url')
383 if url == element.text:
384 return cmd('url')(self, element)
386 return cmd('href', parms=[element.attrib['url']])(self, element)
388 return cmd('emph')(self, element)
390 def handle_obraz(self, element):
391 frmt = self.options['format']
392 name = element.attrib.get('nazwa', '').strip()
393 image = frmt.get_image(name.strip())
394 img_path = "obraz/%s" % name.replace("_", "")
395 frmt.attachments[img_path] = image
396 return cmd("obraz", parms=[img_path])(self)
398 def handle_video(self, element):
399 url = element.attrib.get('url')
401 print '!! <video> missing url'
403 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
405 print '!! unknown <video> url scheme:', url
408 thumb = IOFile.from_string(urlopen
409 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
410 img_path = "video/%s.jpg" % name.replace("_", "")
411 self.options['format'].attachments[img_path] = thumb
412 canon_url = "https://www.youtube.com/watch?v=%s" % name
413 return cmd("video", parms=[img_path, canon_url])(self)
416 class Exercise(EduModule):
417 def __init__(self, *args, **kw):
418 self.question_counter = 0
419 super(Exercise, self).__init__(*args, **kw)
421 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
423 def handle_cwiczenie(self, element):
425 'exercise': element.attrib['typ'],
428 self.question_counter = 0
429 self.piece_counter = 0
431 header = etree.Element("parm")
432 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
433 header_cmd.append(header)
434 header.text = u"Zadanie %d." % self.options['exercise_counter']
436 pre = etree.tostring(header_cmd, encoding=unicode)
438 # Add a single <pytanie> tag if it's not there
439 if not element.xpath(".//pytanie"):
440 qpre, qpost = self.handle_pytanie(element)
445 def handle_pytanie(self, element):
446 """This will handle <cwiczenie> element, when there is no <pytanie>
448 self.question_counter += 1
449 self.piece_counter = 0
451 if self.options['teacher'] and element.attrib.get('rozw'):
452 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
455 def handle_punkt(self, element):
456 pre, post = super(Exercise, self).handle_punkt(element)
457 if self.options['teacher'] and element.attrib.get('rozw'):
458 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
461 def solution_header(self):
462 par = etree.Element("cmd", name="par")
463 parm = etree.Element("parm")
464 parm.text = u"Rozwiązanie:"
466 return etree.tostring(par)
468 def explicit_solution(self):
469 if self.options['solution']:
470 par = etree.Element("cmd", name="par")
471 parm = etree.Element("parm")
472 parm.text = self.options['solution']
474 return self.solution_header() + etree.tostring(par)
478 class Wybor(Exercise):
479 def handle_cwiczenie(self, element):
480 pre, post = super(Wybor, self).handle_cwiczenie(element)
481 is_single_choice = True
482 pytania = element.xpath(".//pytanie")
486 solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
487 if len(solutions) != 1:
488 is_single_choice = False
490 choices = p.xpath(".//*[@nazwa]")
492 for n in choices: uniq.add(n.attrib.get('nazwa', ''))
493 if len(choices) != len(uniq):
494 is_single_choice = False
497 self.options = {'single': is_single_choice}
500 def handle_punkt(self, element):
501 if self.options['exercise'] and element.attrib.get('nazwa', None):
502 cmd = 'radio' if self.options['single'] else 'checkbox'
503 return u'<cmd name="%s"/>' % cmd, ''
505 return super(Wybor, self).handle_punkt(element)
508 class Uporzadkuj(Exercise):
509 def handle_pytanie(self, element):
510 order_items = element.xpath(".//punkt/@rozw")
511 return super(Uporzadkuj, self).handle_pytanie(element)
514 class Przyporzadkuj(Exercise):
515 def handle_lista(self, lista):
516 header = etree.Element("parm")
517 header_cmd = etree.Element("cmd", name="par")
518 header_cmd.append(header)
519 if 'nazwa' in lista.attrib:
520 header.text = u"Kategorie:"
521 elif 'cel' in lista.attrib:
522 header.text = u"Elementy do przyporządkowania:"
524 header.text = u"Lista:"
525 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
526 pre = etree.tostring(header_cmd, encoding=unicode) + pre
530 class Luki(Exercise):
531 def find_pieces(self, question):
532 return question.xpath(".//luka")
534 def solution(self, piece):
535 piece = deepcopy(piece)
538 return sub.generate(piece)
540 def handle_pytanie(self, element):
541 qpre, qpost = super(Luki, self).handle_pytanie(element)
543 luki = self.find_pieces(element)
545 self.words = u"<env name='itemize'>%s</env>" % (
546 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
550 def handle_opis(self, element):
551 return '', self.words
553 def handle_luka(self, element):
555 if self.options['teacher']:
556 piece = deepcopy(element)
559 text = sub.generate(piece)
560 luka += u" [rozwiązanie: %s]" % text
565 def find_pieces(self, question):
566 return question.xpath(".//zastap")
568 def solution(self, piece):
569 return piece.attrib.get('rozw', '')
571 def list_header(self):
572 return u"Elementy do wstawienia"
574 def handle_zastap(self, element):
575 piece = deepcopy(element)
578 text = sub.generate(piece)
579 if self.options['teacher'] and element.attrib.get('rozw'):
580 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
584 class PrawdaFalsz(Exercise):
585 def handle_punkt(self, element):
586 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
587 if 'rozw' in element.attrib:
588 post += u" [Prawda/Fałsz]"
594 lists = tree.xpath(".//lista")
599 if p.tail is None: p.tail = ''
603 if p.text is None: p.text = ''
609 class EduModulePDFFormat(PDFFormat):
610 style = get_resource('res/styles/edumed/pdf/edumed.sty')
613 substitute_hyphens(self.wldoc.edoc)
614 fix_hanging(self.wldoc.edoc)
616 self.attachments = {}
620 "teacher": self.customization.get('teacher'),
622 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
624 open("/tmp/texml.xml", "w").write(texml)
627 def get_tex_dir(self):
628 temp = super(EduModulePDFFormat, self).get_tex_dir()
629 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
630 for name, iofile in self.attachments.items():
631 iofile.save_as(os.path.join(temp, name))
634 def get_image(self, name):
635 return self.wldoc.source.attachments[name]