1 # -*- coding: utf-8 -*-
3 # This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 """PDF creation library.
8 Creates one big XML from the book and its children, converts it to LaTeX
9 with TeXML, then runs it by XeLaTeX.
12 from __future__ import with_statement
16 from StringIO import StringIO
17 from tempfile import mkdtemp, NamedTemporaryFile
19 from copy import deepcopy
20 from subprocess import call, PIPE
22 from Texml.processor import process
23 from lxml import etree
24 from lxml.etree import XMLSyntaxError, XSLTApplyError
26 from xmlutils import Xmill, tag, tagged, ifoption
27 from librarian.dcparser import Person
28 from librarian.parser import WLDocument
29 from librarian import ParseError, DCNS, get_resource, IOFile, Format
30 from librarian import functions
31 from pdf import PDFFormat
37 def _wrap(*args, **kw):
38 value = f(*args, **kw)
40 prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
42 if isinstance(value, list):
43 import pdb; pdb.set_trace()
44 if isinstance(value, tuple):
45 return prefix + value[0], value[1] + postfix
47 return prefix + value + postfix
52 def cmd(name, pass_text=False):
53 def wrap(self, element):
54 pre = u'<cmd name="%s">' % name
57 pre += "<parm>%s</parm>" % element.text
64 def mark_alien_characters(text):
65 text = re.sub(ur"([\u0400-\u04ff]+)", ur"<alien>\1</alien>", text)
69 class EduModule(Xmill):
70 def __init__(self, options=None):
71 super(EduModule, self).__init__(options)
72 self.activity_counter = 0
73 self.exercise_counter = 0
75 def swap_endlines(txt):
76 if self.options['strofa']:
77 txt = txt.replace("/\n", '<ctrl ch="\"/>')
79 self.register_text_filter(functions.substitute_entities)
80 self.register_text_filter(mark_alien_characters)
81 self.register_text_filter(swap_endlines)
83 def get_dc(self, element, dc_field, single=False):
84 values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
89 def handle_rdf__RDF(self, _):
90 "skip metadata in generation"
94 def get_rightsinfo(self, element):
95 rights_lic = self.get_dc(element, 'rights.license', True)
96 return u'<cmd name="rightsinfostr">' + \
97 (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
98 u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
102 def get_authors(self, element):
103 authors = self.get_dc(element, 'creator.expert') + \
104 self.get_dc(element, 'creator.scenario') + \
105 self.get_dc(element, 'creator.textbook')
106 return u', '.join(authors)
109 def get_title(self, element):
110 return self.get_dc(element, 'title', True)
112 def handle_utwor(self, element):
115 <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
117 \\documentclass[%s]{wl}
118 \\usepackage{style}''' % self.options['customization_str'],
119 self.options['has_cover'] and '\usepackage{makecover}',
120 (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
121 (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
122 (self.options['morefloats'] == 'none' and
123 u'''\\IfFileExists{morefloats.sty}{
124 \\usepackage{morefloats}
126 u'''\\def\\authors{%s}''' % self.get_authors(element),
127 u'''\\author{\\authors}''',
128 u'''\\title{%s}''' % self.get_title(element),
129 u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
130 u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
133 return u"".join(filter(None, lines)), u'</TeXML>'
137 def handle_powiesc(self, element):
139 <env name="document">
140 <cmd name="maketitle"/>
144 def handle_texcommand(self, element):
145 cmd = functions.texcommand(element.tag)
146 return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
152 handle_akap_dialog = \
153 handle_akap_dialog = \
154 handle_autor_utworu = \
156 handle_didaskalia = \
157 handle_didask_tekst = \
158 handle_dlugi_cytat = \
159 handle_dzielo_nadrzedne = \
160 handle_lista_osoba = \
162 handle_miejsce_czas = \
164 handle_motto_podpis = \
165 handle_naglowek_akt = \
166 handle_naglowek_czesc = \
167 handle_naglowek_listy = \
168 handle_naglowek_osoba = \
169 handle_naglowek_podrozdzial = \
170 handle_naglowek_podrozdzial = \
171 handle_naglowek_rozdzial = \
172 handle_naglowek_rozdzial = \
173 handle_naglowek_scena = \
174 handle_nazwa_utworu = \
180 handle_poezja_cyt = \
183 handle_sekcja_asterysk = \
184 handle_sekcja_swiatlo = \
185 handle_separator_linia = \
186 handle_slowo_obce = \
188 handle_tytul_dziela = \
189 handle_wyroznienie = \
192 _handle_strofa = cmd("strofa", True)
194 def handle_strofa(self, element):
195 self.options = {'strofa': True}
196 return self._handle_strofa(element)
198 def handle_aktywnosc(self, element):
199 self.activity_counter += 1
202 'activity_counter': self.activity_counter,
205 submill = EduModule(self.options)
207 opis = submill.generate(element.xpath('opis')[0])
209 n = element.xpath('wskazowki')
210 if n: wskazowki = submill.generate(n[0])
213 n = element.xpath('pomoce')
215 if n: pomoce = submill.generate(n[0])
218 forma = ''.join(element.xpath('forma/text()'))
220 czas = ''.join(element.xpath('czas/text()'))
222 counter = self.activity_counter
226 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
227 <cmd name="activityinfo"><parm>
228 <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
229 <cmd name="activityform"><parm>%(forma)s</parm></cmd>
230 <cmd name="activitytools"><parm>%(pomoce)s</parm></cmd>
239 handle_opis = ifoption(sub_gen=True)(lambda s, e: ('', ''))
240 handle_wskazowki = ifoption(sub_gen=True)(lambda s, e: ('', ''))
242 @ifoption(sub_gen=True)
243 def handle_pomoce(self, _):
244 return "Pomoce: ", ""
246 def handle_czas(self, *_):
249 def handle_forma(self, *_):
252 def handle_lista(self, element, attrs={}):
253 ltype = element.attrib.get('typ', 'punkt')
254 if ltype == 'slowniczek':
255 surl = element.attrib.get('href', None)
258 sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
259 self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
261 listcmd = {'num': 'enumerate',
264 'slowniczek': 'itemize',
265 'czytelnia': 'itemize'}[ltype]
267 return u'<env name="%s">' % listcmd, u'</env>'
269 def handle_punkt(self, element):
270 return '<cmd name="item"/>', ''
272 def handle_cwiczenie(self, element):
273 exercise_handlers = {
275 # 'uporzadkuj': Uporzadkuj,
278 # 'przyporzadkuj': Przyporzadkuj,
279 # 'prawdafalsz': PrawdaFalsz
281 typ = element.attrib['typ']
282 if not typ in exercise_handlers:
283 return '(no handler)'
284 handler = exercise_handlers[typ](self.options)
285 return handler.generate(element)
287 # XXX this is copied from pyhtml.py, except for return and
288 # should be refactored for no code duplication
289 def handle_definiendum(self, element):
290 nxt = element.getnext()
293 # let's pull definiens from another document
294 if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'):
295 sxml = self.options['slowniczek_xml']
296 assert element.text != ''
297 defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
299 definiens = defloc[0].getnext()
300 if definiens.tag == 'definiens':
301 subgen = EduModule(self.options)
302 definiens_s = subgen.generate(definiens)
304 return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
306 def handle_definiens(self, element):
309 def handle_podpis(self, element):
310 return u"""<env name="figure">""", u"</env>"
312 def handle_tabela(self, element):
314 for w in element.xpath("wiersz"):
316 if max_col < len(ks):
318 self.options = {'columnts': max_col}
320 # has_frames = int(element.attrib.get("ramki", "0"))
321 # if has_frames: frames_c = "framed"
322 # else: frames_c = ""
323 # return u"""<table class="%s">""" % frames_c, u"</table>"
325 <cmd name="begin"><parm>tabular</parm><opt>%s</opt></cmd>
326 ''' % ('l' * max_col), \
327 u'''<cmd name="end"><parm>tabular</parm></cmd>'''
330 def handle_wiersz(self, element):
331 return u"", u'<ctrl ch="\\"/>'
334 def handle_kol(self, element):
335 if element.getnext() is not None:
336 return u"", u'<spec cat="align">'
339 handle_link = cmd('em', True)
342 class Exercise(EduModule):
343 def __init__(self, *args, **kw):
344 self.question_counter = 0
345 super(Exercise, self).__init__(*args, **kw)
347 handle_rozw_kom = ifoption(teacher=True)(cmd('akap', True))
349 def handle_cwiczenie(self, element):
350 self.options = {'exercise': element.attrib['typ']}
351 self.question_counter = 0
352 self.piece_counter = 0
356 # Add a single <pytanie> tag if it's not there
357 if not element.xpath(".//pytanie"):
358 qpre, qpost = self.handle_pytanie(element)
363 def handle_pytanie(self, element):
364 """This will handle <cwiczenie> element, when there is no <pytanie>
367 self.question_counter += 1
368 self.piece_counter = 0
369 solution = element.attrib.get('rozw', None)
371 opts['solution'] = solution
373 handles = element.attrib.get('uchwyty', None)
375 opts['handles'] = handles
377 minimum = element.attrib.get('min', None)
379 opts['minimum'] = minimum
386 class Wybor(Exercise):
389 def handle_cwiczenie(self, element):
390 pre, post = super(Wybor, self).handle_cwiczenie(element)
391 is_single_choice = True
392 pytania = element.xpath(".//pytanie")
396 solutions = re.split(r"[, ]+", p.attrib['rozw'])
397 if len(solutions) != 1:
398 is_single_choice = False
400 choices = p.xpath(".//*[@nazwa]")
402 for n in choices: uniq.add(n.attrib['nazwa'])
403 if len(choices) != len(uniq):
404 is_single_choice = False
407 self.options = {'single': is_single_choice}
410 def handle_punkt(self, element):
411 if self.options['exercise'] and element.attrib.get('nazwa', None):
412 cmd = 'radio' if self.options['single'] else 'checkbox'
413 return u'<cmd name="%s"/>' % cmd, ''
415 return super(Wybor, self).handle_punkt(element)
421 lists = tree.xpath(".//lista")
426 if p.tail is None: p.tail = ''
430 if p.text is None: p.text = ''
436 class EduModulePDFFormat(PDFFormat):
439 texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
441 open("/tmp/texml.xml", "w").write(texml)