1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
13 from copy import deepcopy
17 from StringIO import StringIO
18 from tempfile import mkdtemp, NamedTemporaryFile
21 from copy import deepcopy
22 from subprocess import call, PIPE
23 from urllib2 import urlopen
25 from Texml.processor import process
26 from lxml import etree
27 from lxml.etree import XMLSyntaxError, XSLTApplyError
29 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
30 from librarian.dcparser import Person
31 from librarian.parser import WLDocument
32 from librarian import ParseError, DCNS, get_resource, IOFile, Format
33 from librarian import functions
34 from pdf import PDFFormat
40 def _wrap(*args, **kw):
41 value = f(*args, **kw)
43 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
45 if isinstance(value, list):
46 import pdb; pdb.set_trace()
47 if isinstance(value, tuple):
48 return prefix + value[0], value[1] + postfix
50 return prefix + value + postfix
55 def cmd(name, parms=None):
56 def wrap(self, element=None):
57 pre, post = tag_open_close('cmd', name=name)
61 e = etree.Element("parm")
63 pre += etree.tostring(e)
64 if element is not None:
66 post = "</parm>" + post
73 def mark_alien_characters(text):
74 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
78 class EduModule(Xmill):
79 def __init__(self, options=None):
80 super(EduModule, self).__init__(options)
81 self.activity_counter = 0
82 self.exercise_counter = 0
84 def swap_endlines(txt):
85 if self.options['strofa']:
86 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
88 self.register_text_filter(functions.substitute_entities)
89 self.register_text_filter(mark_alien_characters)
90 self.register_text_filter(swap_endlines)
92 def get_dc(self, element, dc_field, single=False):
93 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
98 def handle_rdf__RDF(self, _):
99 "skip metadata in generation"
103 def get_rightsinfo(self, element):
104 rights_lic = self.get_dc(element, 'rights.license', True)
105 return u'<cmd name="rightsinfostr">' + \
106 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
107 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
111 def get_authors(self, element, which=None):
112 dc = self.options['wldoc'].book_info
114 authors = dc.authors_textbook + \
115 dc.authors_scenario + \
118 authors = getattr(dc, "authors_%s" % which)
119 return u', '.join(author.readable() for author in authors)
122 def get_title(self, element):
123 return self.get_dc(element, 'title', True)
125 def handle_utwor(self, element):
128 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
130 \\documentclass[%s]{wl}
131 \\usepackage{style}''' % self.options['customization_str'],
132 self.options['has_cover'] and '\usepackage{makecover}',
133 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
134 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
135 (self.options['morefloats'] == 'none' and
136 u'''\\IfFileExists{morefloats.sty}{
137 \\usepackage{morefloats}
139 u'''\\def\\authors{%s}''' % self.get_authors(element),
140 u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
141 u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
142 u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
144 u'''\\author{\\authors}''',
145 u'''\\title{%s}''' % self.get_title(element),
146 u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
147 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
150 return u"".join(filter(None, lines)), u'</TeXML>'
154 def handle_powiesc(self, element):
156 <env name="document">
157 <cmd name="maketitle"/>
158 """, """<cmd name="editorialsection" /></env>"""
161 def handle_texcommand(self, element):
162 cmd = functions.texcommand(element.tag)
163 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
169 handle_akap_dialog = \
170 handle_akap_dialog = \
171 handle_autor_utworu = \
173 handle_didaskalia = \
174 handle_didask_tekst = \
175 handle_dlugi_cytat = \
176 handle_dzielo_nadrzedne = \
177 handle_lista_osoba = \
179 handle_miejsce_czas = \
181 handle_motto_podpis = \
182 handle_naglowek_akt = \
183 handle_naglowek_czesc = \
184 handle_naglowek_listy = \
185 handle_naglowek_osoba = \
186 handle_naglowek_podrozdzial = \
187 handle_naglowek_podrozdzial = \
188 handle_naglowek_rozdzial = \
189 handle_naglowek_rozdzial = \
190 handle_naglowek_scena = \
191 handle_nazwa_utworu = \
197 handle_poezja_cyt = \
200 handle_sekcja_asterysk = \
201 handle_sekcja_swiatlo = \
202 handle_separator_linia = \
203 handle_slowo_obce = \
205 handle_tytul_dziela = \
206 handle_wyroznienie = \
209 _handle_strofa = cmd("strofa")
211 def handle_strofa(self, element):
212 self.options = {'strofa': True}
213 return self._handle_strofa(element)
215 def handle_aktywnosc(self, element):
216 self.activity_counter += 1
219 'activity_counter': self.activity_counter,
222 submill = EduModule(self.options)
224 opis = submill.generate(element.xpath('opis')[0])
226 n = element.xpath('wskazowki')
227 if n: wskazowki = submill.generate(n[0])
230 n = element.xpath('pomoce')
232 if n: pomoce = submill.generate(n[0])
235 forma = ''.join(element.xpath('forma/text()'))
237 czas = ''.join(element.xpath('czas/text()'))
239 counter = self.activity_counter
242 <cmd name="noindent" />
243 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
244 <cmd name="activityinfo"><parm>
245 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
246 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
247 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
256 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
257 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
259 @ifoption(sub_gen=True)
260 def handle_pomoce(self, _):
261 return "Pomoce: ", ""
263 def handle_czas(self, *_):
266 def handle_forma(self, *_):
269 def handle_lista(self, element, attrs={}):
270 if not element.findall("punkt"):
272 ltype = element.attrib.get('typ', 'punkt')
273 if ltype == 'slowniczek':
274 surl = element.attrib.get('src', None)
276 # print '** missing src on <slowniczek>, setting default'
277 surl = 'http://edukacjamedialna.edu.pl/slowniczek'
280 sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
281 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
283 listcmd = {'num': 'enumerate',
286 'slowniczek': 'itemize',
287 'czytelnia': 'itemize'}[ltype]
289 return u'<env name="%s">' % listcmd, u'</env>'
291 def handle_punkt(self, element):
292 return '<cmd name="item"/>', ''
294 def handle_cwiczenie(self, element):
295 exercise_handlers = {
297 'uporzadkuj': Uporzadkuj,
300 'przyporzadkuj': Przyporzadkuj,
301 'prawdafalsz': PrawdaFalsz
304 typ = element.attrib['typ']
305 self.exercise_counter += 1
306 if not typ in exercise_handlers:
307 return '(no handler)'
308 self.options = {'exercise_counter': self.exercise_counter}
309 handler = exercise_handlers[typ](self.options)
310 return handler.generate(element)
312 # XXX this is copied from pyhtml.py, except for return and
313 # should be refactored for no code duplication
314 def handle_definiendum(self, element):
315 nxt = element.getnext()
318 # let's pull definiens from another document
319 if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
320 sxml = self.options['slowniczek_xml']
321 assert element.text != ''
322 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
324 definiens = defloc[0].getnext()
325 if definiens.tag == 'definiens':
326 subgen = EduModule(self.options)
327 definiens_s = subgen.generate(definiens)
329 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
331 def handle_definiens(self, element):
334 def handle_podpis(self, element):
335 return u"""<env name="figure">""", u"</env>"
337 def handle_tabela(self, element):
339 for w in element.xpath("wiersz"):
341 if max_col < len(ks):
343 self.options = {'columnts': max_col}
345 # has_frames = int(element.attrib.get("ramki", "0"))
346 # if has_frames: frames_c = "framed"
347 # else: frames_c = ""
348 # return u"""<table class="%s">""" % frames_c, u"</table>"
350 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
351 ''' % ('l' * max_col), \
352 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
355 def handle_wiersz(self, element):
356 return u"", u'<ctrl ch="\\"/>'
359 def handle_kol(self, element):
360 if element.getnext() is not None:
361 return u"", u'<spec cat="align" />'
364 def handle_link(self, element):
365 if element.attrib.get('url'):
366 url = element.attrib.get('url')
367 if url == element.text:
368 return cmd('url')(self, element)
370 return cmd('href', parms=[element.attrib['url']])(self, element)
372 return cmd('emph')(self, element)
374 def handle_obraz(self, element):
375 frmt = self.options['format']
376 name = element.attrib['nazwa'].strip()
377 image = frmt.get_image(name.strip())
378 img_path = "obraz/%s" % name.replace("_", "")
379 frmt.attachments[img_path] = image
380 return cmd("obraz", parms=[img_path])(self)
382 def handle_video(self, element):
383 url = element.attrib.get('url')
385 print '!! <video> missing url'
387 m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
389 print '!! unknown <video> url scheme:', url
392 thumb = IOFile.from_string(urlopen
393 ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
394 img_path = "video/%s.jpg" % name.replace("_", "")
395 self.options['format'].attachments[img_path] = thumb
396 canon_url = "https://www.youtube.com/watch?v=%s" % name
397 return cmd("video", parms=[img_path, canon_url])(self)
400 class Exercise(EduModule):
401 def __init__(self, *args, **kw):
402 self.question_counter = 0
403 super(Exercise, self).__init__(*args, **kw)
405 handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
407 def handle_cwiczenie(self, element):
409 'exercise': element.attrib['typ'],
412 self.question_counter = 0
413 self.piece_counter = 0
415 header = etree.Element("parm")
416 header_cmd = etree.Element("cmd", name="naglowekpodrozdzial")
417 header_cmd.append(header)
418 header.text = u"Zadanie %d." % self.options['exercise_counter']
420 pre = etree.tostring(header_cmd, encoding=unicode)
422 # Add a single <pytanie> tag if it's not there
423 if not element.xpath(".//pytanie"):
424 qpre, qpost = self.handle_pytanie(element)
429 def handle_pytanie(self, element):
430 """This will handle <cwiczenie> element, when there is no <pytanie>
432 self.question_counter += 1
433 self.piece_counter = 0
435 if self.options['teacher'] and element.attrib.get('rozw'):
436 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
439 def handle_punkt(self, element):
440 pre, post = super(Exercise, self).handle_punkt(element)
441 if self.options['teacher'] and element.attrib.get('rozw'):
442 post += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
445 def solution_header(self):
446 par = etree.Element("cmd", name="par")
447 parm = etree.Element("parm")
448 parm.text = u"Rozwiązanie:"
450 return etree.tostring(par)
452 def explicit_solution(self):
453 if self.options['solution']:
454 par = etree.Element("cmd", name="par")
455 parm = etree.Element("parm")
456 parm.text = self.options['solution']
458 return self.solution_header() + etree.tostring(par)
462 class Wybor(Exercise):
463 def handle_cwiczenie(self, element):
464 pre, post = super(Wybor, self).handle_cwiczenie(element)
465 is_single_choice = True
466 pytania = element.xpath(".//pytanie")
470 solutions = re.split(r"[, ]+", p.attrib['rozw'])
471 if len(solutions) != 1:
472 is_single_choice = False
474 choices = p.xpath(".//*[@nazwa]")
476 for n in choices: uniq.add(n.attrib['nazwa'])
477 if len(choices) != len(uniq):
478 is_single_choice = False
481 self.options = {'single': is_single_choice}
484 def handle_punkt(self, element):
485 if self.options['exercise'] and element.attrib.get('nazwa', None):
486 cmd = 'radio' if self.options['single'] else 'checkbox'
487 return u'<cmd name="%s"/>' % cmd, ''
489 return super(Wybor, self).handle_punkt(element)
492 class Uporzadkuj(Exercise):
493 def handle_pytanie(self, element):
494 order_items = element.xpath(".//punkt/@rozw")
495 return super(Uporzadkuj, self).handle_pytanie(element)
498 class Przyporzadkuj(Exercise):
499 def handle_lista(self, lista):
500 header = etree.Element("parm")
501 header_cmd = etree.Element("cmd", name="par")
502 header_cmd.append(header)
503 if 'nazwa' in lista.attrib:
504 header.text = u"Kategorie:"
505 elif 'cel' in lista.attrib:
506 header.text = u"Elementy do przyporządkowania:"
508 header.text = u"Lista:"
509 pre, post = super(Przyporzadkuj, self).handle_lista(lista)
510 pre = etree.tostring(header_cmd, encoding=unicode) + pre
514 class Luki(Exercise):
515 def find_pieces(self, question):
516 return question.xpath(".//luka")
518 def solution(self, piece):
519 piece = deepcopy(piece)
522 return sub.generate(piece)
524 def handle_pytanie(self, element):
525 qpre, qpost = super(Luki, self).handle_pytanie(element)
527 luki = self.find_pieces(element)
529 self.words = u"<env name='itemize'>%s</env>" % (
530 "".join("<cmd name='item'/>%s" % self.solution(luka) for luka in luki)
534 def handle_opis(self, element):
535 return '', self.words
537 def handle_luka(self, element):
539 if self.options['teacher']:
540 piece = deepcopy(element)
543 text = sub.generate(piece)
544 luka += u" [rozwiązanie: %s]" % text
549 def find_pieces(self, question):
550 return question.xpath(".//zastap")
552 def solution(self, piece):
553 return piece.attrib['rozw']
555 def list_header(self):
556 return u"Elementy do wstawienia"
558 def handle_zastap(self, element):
559 piece = deepcopy(element)
562 text = sub.generate(piece)
563 if self.options['teacher'] and element.attrib.get('rozw'):
564 text += u" [rozwiązanie: %s]" % element.attrib.get('rozw')
568 class PrawdaFalsz(Exercise):
569 def handle_punkt(self, element):
570 pre, post = super(PrawdaFalsz, self).handle_punkt(element)
571 if 'rozw' in element.attrib:
572 post += u" [Prawda/Fałsz]"
578 lists = tree.xpath(".//lista")
583 if p.tail is None: p.tail = ''
587 if p.text is None: p.text = ''
593 class EduModulePDFFormat(PDFFormat):
594 style = get_resource('res/styles/edumed/pdf/edumed.sty')
597 self.attachments = {}
601 "teacher": self.customization.get('teacher'),
603 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
605 open("/tmp/texml.xml", "w").write(texml)
608 def get_tex_dir(self):
609 temp = super(EduModulePDFFormat, self).get_tex_dir()
610 shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
611 for name, iofile in self.attachments.items():
612 iofile.save_as(os.path.join(temp, name))
615 def get_image(self, name):
616 return self.wldoc.source.attachments[name]