X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/a7133c06fd9738c11a2bf60b4fc09365d15da1d7..fcaf06749607b02e11e4edc3b8ae8313e1a1ef91:/librarian/pypdf.py diff --git a/librarian/pypdf.py b/librarian/pypdf.py index cb082ca..387f647 100644 --- a/librarian/pypdf.py +++ b/librarian/pypdf.py @@ -10,12 +10,14 @@ with TeXML, then runs it by XeLaTeX. """ from __future__ import with_statement +from copy import deepcopy import os import os.path import shutil from StringIO import StringIO from tempfile import mkdtemp, NamedTemporaryFile import re +import random from copy import deepcopy from subprocess import call, PIPE @@ -23,7 +25,7 @@ from Texml.processor import process from lxml import etree from lxml.etree import XMLSyntaxError, XSLTApplyError -from xmlutils import Xmill, tag, tagged, ifoption +from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close from librarian.dcparser import Person from librarian.parser import WLDocument from librarian import ParseError, DCNS, get_resource, IOFile, Format @@ -31,6 +33,7 @@ from librarian import functions from pdf import PDFFormat + def escape(really): def deco(f): def _wrap(*args, **kw): @@ -48,15 +51,18 @@ def escape(really): return deco -def cmd(name, pass_text=False): +def cmd(name, parms=None): def wrap(self, element): - pre = u'' % name - - if pass_text: - pre += "%s" % element.text - return pre + '' - else: - return pre, '' + pre, post = tag_open_close('cmd', name=name) + + if parms: + for parm in parms: + e = etree.Element("parm") + e.text = parm + pre += etree.tostring(e) + pre += "" + post = "" + post + return pre, post return wrap @@ -69,8 +75,15 @@ class EduModule(Xmill): def __init__(self, options=None): super(EduModule, self).__init__(options) self.activity_counter = 0 + self.exercise_counter = 0 + + def swap_endlines(txt): + if self.options['strofa']: + txt = txt.replace("/\n", '') + return txt self.register_text_filter(functions.substitute_entities) self.register_text_filter(mark_alien_characters) + self.register_text_filter(swap_endlines) def get_dc(self, element, dc_field, single=False): values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri})) @@ -125,9 +138,6 @@ class EduModule(Xmill): return u"".join(filter(None, lines)), u'' - handle_naglowek_rozdzial = escape(True)(cmd("naglowekrozdzial", True)) - handle_naglowek_podrozdzial = escape(True)(cmd("naglowekpodrozdzial", True)) - @escape(1) def handle_powiesc(self, element): return u""" @@ -135,13 +145,60 @@ class EduModule(Xmill): """, """""" - handle_autor_utworu = cmd('autorutworu', True) - handle_nazwa_utworu = cmd('nazwautworu', True) - handle_dzielo_nadrzedne = cmd('dzielonadrzedne', True) - handle_podtytul = cmd('podtytul', True) - - handle_akap = handle_akap_dialog = handle_akap_cd = lambda s, e: ("\n", "\n") - handle_strofa = lambda s, e: ("\n","\n") + @escape(1) + def handle_texcommand(self, element): + cmd = functions.texcommand(element.tag) + return u'' % cmd, u'' + + handle_akap = \ + handle_akap = \ + handle_akap_cd = \ + handle_akap_cd = \ + handle_akap_dialog = \ + handle_akap_dialog = \ + handle_autor_utworu = \ + handle_dedykacja = \ + handle_didaskalia = \ + handle_didask_tekst = \ + handle_dlugi_cytat = \ + handle_dzielo_nadrzedne = \ + handle_lista_osoba = \ + handle_mat = \ + handle_miejsce_czas = \ + handle_motto = \ + handle_motto_podpis = \ + handle_naglowek_akt = \ + handle_naglowek_czesc = \ + handle_naglowek_listy = \ + handle_naglowek_osoba = \ + handle_naglowek_podrozdzial = \ + handle_naglowek_podrozdzial = \ + handle_naglowek_rozdzial = \ + handle_naglowek_rozdzial = \ + handle_naglowek_scena = \ + handle_nazwa_utworu = \ + handle_nota = \ + handle_osoba = \ + handle_pa = \ + handle_pe = \ + handle_podtytul = \ + handle_poezja_cyt = \ + handle_pr = \ + handle_pt = \ + handle_sekcja_asterysk = \ + handle_sekcja_swiatlo = \ + handle_separator_linia = \ + handle_slowo_obce = \ + handle_srodtytul = \ + handle_tytul_dziela = \ + handle_wyroznienie = \ + handle_texcommand + + _handle_strofa = cmd("strofa") + + def handle_strofa(self, element): + self.options = {'strofa': True} + return self._handle_strofa(element) def handle_aktywnosc(self, element): self.activity_counter += 1 @@ -170,11 +227,16 @@ class EduModule(Xmill): counter = self.activity_counter return u""" -Czas: %(czas)s min -Forma: %(forma)s -%(pomoce)s -%(counter)d. %(opis)s +%(counter)d. + + %(czas)s + %(forma)s + %(pomoce)s + + + +%(opis)s %(wskazowki)s """ % locals() @@ -192,162 +254,302 @@ Forma: %(forma)s def handle_forma(self, *_): return -# def handle_cwiczenie(self, element): -# exercise_handlers = { -# 'wybor': Wybor, -# 'uporzadkuj': Uporzadkuj, -# 'luki': Luki, -# 'zastap': Zastap, -# 'przyporzadkuj': Przyporzadkuj, -# 'prawdafalsz': PrawdaFalsz -# } - -# typ = element.attrib['typ'] -# handler = exercise_handlers[typ](self.options) -# return handler.generate(element) - -# # Lists -# def handle_lista(self, element, attrs={}): -# ltype = element.attrib.get('typ', 'punkt') -# if ltype == 'slowniczek': -# surl = element.attrib.get('href', None) -# sxml = None -# if surl: -# sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string()) -# self.options = {'slowniczek': True, 'slowniczek_xml': sxml } -# return '
', '
' - -# listtag = {'num': 'ol', -# 'punkt': 'ul', -# 'alfa': 'ul', -# 'czytelnia': 'ul'}[ltype] - -# classes = attrs.get('class', '') -# if classes: del attrs['class'] - -# attrs_s = ' '.join(['%s="%s"' % kv for kv in attrs.items()]) -# if attrs_s: attrs_s = ' ' + attrs_s - -# return '<%s class="lista %s %s"%s>' % (listtag, ltype, classes, attrs_s), '' % listtag - -# def handle_punkt(self, element): -# if self.options['slowniczek']: -# return '
', '
' -# else: -# return '
  • ', '
  • ' - -# def handle_definiendum(self, element): -# nxt = element.getnext() -# definiens_s = '' - -# # let's pull definiens from another document -# if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'): -# sxml = self.options['slowniczek_xml'] -# assert element.text != '' -# defloc = sxml.xpath("//definiendum[text()='%s']" % element.text) -# if defloc: -# definiens = defloc[0].getnext() -# if definiens.tag == 'definiens': -# subgen = EduModule(self.options) -# definiens_s = subgen.generate(definiens) - -# return u"
    ", u"
    " + definiens_s - -# def handle_definiens(self, element): -# return u"
    ", u"
    " - - -# def handle_podpis(self, element): -# return u"""
    """, u"
    " - -# def handle_tabela(self, element): -# has_frames = int(element.attrib.get("ramki", "0")) -# if has_frames: frames_c = "framed" -# else: frames_c = "" -# return u"""""" % frames_c, u"
    " - -# def handle_wiersz(self, element): -# return u"", u"" - -# def handle_kol(self, element): -# return u"", u"" - -# def handle_rdf__RDF(self, _): -# # ustal w opcjach rzeczy :D -# return - -# def handle_link(self, element): -# if 'material' in element.attrib: -# formats = re.split(r"[, ]+", element.attrib['format']) -# fmt_links = [] -# for f in formats: -# fmt_links.append(u'%s' % (self.options['urlmapper'].url_for_material(element.attrib['material'], f), f.upper())) - -# return u"", u' (%s)' % u' '.join(fmt_links) - - -# class Exercise(EduModule): -# def __init__(self, *args, **kw): -# self.question_counter = 0 -# super(Exercise, self).__init__(*args, **kw) - -# def handle_rozw_kom(self, element): -# return u"""""" - -# def handle_cwiczenie(self, element): -# self.options = {'exercise': element.attrib['typ']} -# self.question_counter = 0 -# self.piece_counter = 0 - -# pre = u""" -#
    -#
    -# """ % element.attrib -# post = u""" -#
    -# -# -# -# -# -#
    -#
    -#
    -# """ -# # Add a single tag if it's not there -# if not element.xpath(".//pytanie"): -# qpre, qpost = self.handle_pytanie(element) -# pre = pre + qpre -# post = qpost + post -# return pre, post - -# def handle_pytanie(self, element): -# """This will handle element, when there is no -# """ -# add_class = "" -# self.question_counter += 1 -# self.piece_counter = 0 -# solution = element.attrib.get('rozw', None) -# if solution: solution_s = ' data-solution="%s"' % solution -# else: solution_s = '' - -# handles = element.attrib.get('uchwyty', None) -# if handles: -# add_class += ' handles handles-%s' % handles -# self.options = {'handles': handles} - -# minimum = element.attrib.get('min', None) -# if minimum: minimum_s = ' data-minimum="%d"' % int(minimum) -# else: minimum_s = '' - -# return '
    ' %\ -# (add_class, self.question_counter, solution_s + minimum_s), \ -# "
    " + def handle_lista(self, element, attrs={}): + if not element.findall("punkt"): + return None + ltype = element.attrib.get('typ', 'punkt') + if ltype == 'slowniczek': + surl = element.attrib.get('href', None) + sxml = None + if surl: + sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string()) + self.options = {'slowniczek': True, 'slowniczek_xml': sxml } + + listcmd = {'num': 'enumerate', + 'punkt': 'itemize', + 'alfa': 'itemize', + 'slowniczek': 'itemize', + 'czytelnia': 'itemize'}[ltype] + + return u'' % listcmd, u'' + + def handle_punkt(self, element): + return '', '' + + def handle_cwiczenie(self, element): + exercise_handlers = { + 'wybor': Wybor, + 'uporzadkuj': Uporzadkuj, + 'luki': Luki, + 'zastap': Zastap, + 'przyporzadkuj': Przyporzadkuj, + 'prawdafalsz': PrawdaFalsz + } + + typ = element.attrib['typ'] + self.exercise_counter += 1 + if not typ in exercise_handlers: + return '(no handler)' + self.options = {'exercise_counter': self.exercise_counter} + handler = exercise_handlers[typ](self.options) + return handler.generate(element) + + # XXX this is copied from pyhtml.py, except for return and + # should be refactored for no code duplication + def handle_definiendum(self, element): + nxt = element.getnext() + definiens_s = '' + + # let's pull definiens from another document + if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'): + sxml = self.options['slowniczek_xml'] + assert element.text != '' + defloc = sxml.xpath("//definiendum[text()='%s']" % element.text) + if defloc: + definiens = defloc[0].getnext() + if definiens.tag == 'definiens': + subgen = EduModule(self.options) + definiens_s = subgen.generate(definiens) + + return u'', u": " + definiens_s + + def handle_definiens(self, element): + return u"", u"" + + def handle_podpis(self, element): + return u"""""", u"" + + def handle_tabela(self, element): + max_col = 0 + for w in element.xpath("wiersz"): + ks = w.xpath("kol") + if max_col < len(ks): + max_col = len(ks) + self.options = {'columnts': max_col} + # styling: + # has_frames = int(element.attrib.get("ramki", "0")) + # if has_frames: frames_c = "framed" + # else: frames_c = "" + # return u"""""" % frames_c, u"
    " + return u''' +tabular%s + ''' % ('l' * max_col), \ + u'''tabular''' + + @escape(1) + def handle_wiersz(self, element): + return u"", u'' + + @escape(1) + def handle_kol(self, element): + if element.getnext() is not None: + return u"", u'' + return u"", u"" + + def handle_link(self, element): + if element.attrib.get('url'): + return cmd('href', parms=[element.attrib['url']])(self, element) + else: + return cmd('em')(self, element) + + +class Exercise(EduModule): + def __init__(self, *args, **kw): + self.question_counter = 0 + super(Exercise, self).__init__(*args, **kw) + + handle_rozw_kom = ifoption(teacher=True)(cmd('akap')) + + def handle_cwiczenie(self, element): + self.options = { + 'exercise': element.attrib['typ'], + 'sub_gen': True, + } + self.question_counter = 0 + self.piece_counter = 0 + + header = etree.Element("parm") + header_cmd = etree.Element("cmd", name="naglowekpodrozdzial") + header_cmd.append(header) + header.text = u"Zadanie %d." % self.options['exercise_counter'] + + pre = etree.tostring(header_cmd, encoding=unicode) + post = u"" + # Add a single tag if it's not there + if not element.xpath(".//pytanie"): + qpre, qpost = self.handle_pytanie(element) + pre = pre + qpre + post = qpost + post + return pre, post + + def handle_pytanie(self, element): + """This will handle element, when there is no + """ + self.question_counter += 1 + self.piece_counter = 0 + pre = post = u"" + if self.options['teacher'] and element.attrib.get('rozw'): + post += u" [rozwiązanie: %s]" % element.attrib.get('rozw') + return pre, post + + def handle_punkt(self, element): + pre, post = super(Exercise, self).handle_punkt(element) + if self.options['teacher'] and element.attrib.get('rozw'): + post += u" [rozwiązanie: %s]" % element.attrib.get('rozw') + return pre, post + + def solution_header(self): + par = etree.Element("cmd", name="par") + parm = etree.Element("parm") + parm.text = u"Rozwiązanie:" + par.append(parm) + return etree.tostring(par) + + def explicit_solution(self): + if self.options['solution']: + par = etree.Element("cmd", name="par") + parm = etree.Element("parm") + parm.text = self.options['solution'] + par.append(parm) + return self.solution_header() + etree.tostring(par) + + + +class Wybor(Exercise): + def handle_cwiczenie(self, element): + pre, post = super(Wybor, self).handle_cwiczenie(element) + is_single_choice = True + pytania = element.xpath(".//pytanie") + if not pytania: + pytania = [element] + for p in pytania: + solutions = re.split(r"[, ]+", p.attrib['rozw']) + if len(solutions) != 1: + is_single_choice = False + break + choices = p.xpath(".//*[@nazwa]") + uniq = set() + for n in choices: uniq.add(n.attrib['nazwa']) + if len(choices) != len(uniq): + is_single_choice = False + break + + self.options = {'single': is_single_choice} + return pre, post + + def handle_punkt(self, element): + if self.options['exercise'] and element.attrib.get('nazwa', None): + cmd = 'radio' if self.options['single'] else 'checkbox' + return u'' % cmd, '' + else: + return super(Wybor, self).handle_punkt(element) + + +class Uporzadkuj(Exercise): + def handle_pytanie(self, element): + order_items = element.xpath(".//punkt/@rozw") + return super(Uporzadkuj, self).handle_pytanie(element) + + +class Przyporzadkuj(Exercise): + def handle_lista(self, lista): + header = etree.Element("parm") + header_cmd = etree.Element("cmd", name="par") + header_cmd.append(header) + if 'nazwa' in lista.attrib: + header.text = u"Kategorie:" + elif 'cel' in lista.attrib: + header.text = u"Elementy do przyporządkowania:" + else: + header.text = u"Lista:" + pre, post = super(Przyporzadkuj, self).handle_lista(lista) + pre = etree.tostring(header_cmd, encoding=unicode) + pre + return pre, post + + +class Luki(Exercise): + def find_pieces(self, question): + return question.xpath(".//luka") + + def solution(self, piece): + piece = deepcopy(piece) + piece.tail = None + sub = EduModule() + return sub.generate(piece) + + def handle_pytanie(self, element): + qpre, qpost = super(Luki, self).handle_pytanie(element) + + luki = self.find_pieces(element) + random.shuffle(luki) + self.words = u"%s" % ( + "".join("%s" % self.solution(luka) for luka in luki) + ) + return qpre, qpost + + def handle_opis(self, element): + return '', self.words + + def handle_luka(self, element): + luka = "_" * 10 + if self.options['teacher']: + piece = deepcopy(element) + piece.tail = None + sub = EduModule() + text = sub.generate(piece) + luka += u" [rozwiązanie: %s]" % text + return luka + + +class Zastap(Luki): + def find_pieces(self, question): + return question.xpath(".//zastap") + + def solution(self, piece): + return piece.attrib['rozw'] + + def list_header(self): + return u"Elementy do wstawienia" + + def handle_zastap(self, element): + piece = deepcopy(element) + piece.tail = None + sub = EduModule() + text = sub.generate(piece) + if self.options['teacher'] and element.attrib.get('rozw'): + text += u" [rozwiązanie: %s]" % element.attrib.get('rozw') + return text + + +class PrawdaFalsz(Exercise): + def handle_punkt(self, element): + pre, post = super(PrawdaFalsz, self).handle_punkt(element) + if 'rozw' in element.attrib: + post += u" [Prawda/Fałsz]" + return pre, post + + + +def fix_lists(tree): + lists = tree.xpath(".//lista") + for l in lists: + if l.text: + p = l.getprevious() + if p is not None: + if p.tail is None: p.tail = '' + p.tail += l.text + else: + p = l.getparent() + if p.text is None: p.text = '' + p.text += l.text + l.text = '' + return tree + class EduModulePDFFormat(PDFFormat): def get_texml(self): - edumod = EduModule() - texml = edumod.generate(self.wldoc.edoc.getroot()).encode('utf-8') + edumod = EduModule({"teacher": self.customization.get('teacher')}) + texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8') open("/tmp/texml.xml", "w").write(texml) return texml