X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/527b1f9a4cfd256107d1c8fb7fe2f208a5be9409..22290f82670463d15f15f42cf1fe3ead76a6c08e:/librarian/pypdf.py diff --git a/librarian/pypdf.py b/librarian/pypdf.py index 2bcd8d0..bb2881f 100644 --- a/librarian/pypdf.py +++ b/librarian/pypdf.py @@ -9,30 +9,20 @@ Creates one big XML from the book and its children, converts it to LaTeX with TeXML, then runs it by XeLaTeX. """ -from __future__ import with_statement from copy import deepcopy -import os import os.path import shutil -from StringIO import StringIO -from tempfile import mkdtemp, NamedTemporaryFile import re import random -from copy import deepcopy -from subprocess import call, PIPE from urllib2 import urlopen -from Texml.processor import process from lxml import etree -from lxml.etree import XMLSyntaxError, XSLTApplyError from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close from librarian.dcparser import Person -from librarian.parser import WLDocument -from librarian import ParseError, DCNS, get_resource, IOFile, Format +from librarian import DCNS, get_resource, IOFile from librarian import functions -from pdf import PDFFormat - +from pdf import PDFFormat, substitute_hyphens, fix_hanging def escape(really): @@ -76,18 +66,19 @@ def mark_alien_characters(text): class EduModule(Xmill): - def __init__(self, options=None): - super(EduModule, self).__init__(options) + def __init__(self, options=None, state=None): + super(EduModule, self).__init__(options, state) self.activity_counter = 0 + self.activity_last = None self.exercise_counter = 0 def swap_endlines(txt): if self.options['strofa']: txt = txt.replace("/\n", '') return txt + self.register_text_filter(swap_endlines) self.register_text_filter(functions.substitute_entities) self.register_text_filter(mark_alien_characters) - self.register_text_filter(swap_endlines) def get_dc(self, element, dc_field, single=False): values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri})) @@ -116,7 +107,7 @@ class EduModule(Xmill): dc.authors_expert else: authors = getattr(dc, "authors_%s" % which) - return u', '.join(author.readable() for author in authors) + return u', '.join(author.readable() for author in authors if author) @escape(1) def get_title(self, element): @@ -183,10 +174,6 @@ class EduModule(Xmill): handle_naglowek_czesc = \ handle_naglowek_listy = \ handle_naglowek_osoba = \ - handle_naglowek_podrozdzial = \ - handle_naglowek_podrozdzial = \ - handle_naglowek_rozdzial = \ - handle_naglowek_rozdzial = \ handle_naglowek_scena = \ handle_nazwa_utworu = \ handle_nota = \ @@ -204,13 +191,41 @@ class EduModule(Xmill): handle_srodtytul = \ handle_tytul_dziela = \ handle_wyroznienie = \ + handle_dywiz = \ handle_texcommand + def handle_naglowek_rozdzial(self, element): + if not self.options['teacher']: + if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')): + self.state['mute'] = False + else: + self.state['mute'] = True + return None + return self.handle_texcommand(element) + handle_naglowek_rozdzial.unmuter = True + + def handle_naglowek_podrozdzial(self, element): + self.activity_counter = 0 + if not self.options['teacher']: + if element.text.startswith(u'Dla ucznia'): + self.state['mute'] = False + return None + elif element.text.startswith(u'Dla nauczyciela'): + self.state['mute'] = True + return None + elif self.state['mute']: + return None + return self.handle_texcommand(element) + handle_naglowek_podrozdzial.unmuter = True + def handle_uwaga(self, _e): return None def handle_extra(self, _e): return None + def handle_nbsp(self, _e): + return '' + _handle_strofa = cmd("strofa") def handle_strofa(self, element): @@ -224,9 +239,12 @@ class EduModule(Xmill): 'activity_counter': self.activity_counter, 'sub_gen': True, } - submill = EduModule(self.options) + submill = EduModule(self.options, self.state) - opis = submill.generate(element.xpath('opis')[0]) + if element.xpath('opis'): + opis = submill.generate(element.xpath('opis')[0]) + else: + opis = '' n = element.xpath('wskazowki') if n: wskazowki = submill.generate(n[0]) @@ -243,9 +261,16 @@ class EduModule(Xmill): counter = self.activity_counter + if element.getnext().tag == 'aktywnosc' or (self.activity_last and self.activity_last.getnext() == element): + counter_tex = """%(counter)d.""" % locals() + else: + counter_tex = '' + + self.activity_last = element + return u""" -%(counter)d. +%(counter_tex)s %(czas)s %(forma)s @@ -272,17 +297,18 @@ class EduModule(Xmill): return def handle_lista(self, element, attrs={}): - if not element.findall("punkt"): - return None ltype = element.attrib.get('typ', 'punkt') + if not element.findall("punkt"): + if ltype == 'czytelnia': + return 'W przygotowaniu.' + else: + return None if ltype == 'slowniczek': surl = element.attrib.get('src', None) if surl is None: # print '** missing src on , setting default' - surl = 'http://edukacjamedialna.edu.pl/slowniczek' - sxml = None - if surl: - sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string()) + surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/' + sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string()) self.options = {'slowniczek': True, 'slowniczek_xml': sxml } listcmd = {'num': 'enumerate', @@ -311,7 +337,7 @@ class EduModule(Xmill): if not typ in exercise_handlers: return '(no handler)' self.options = {'exercise_counter': self.exercise_counter} - handler = exercise_handlers[typ](self.options) + handler = exercise_handlers[typ](self.options, self.state) return handler.generate(element) # XXX this is copied from pyhtml.py, except for return and @@ -324,11 +350,14 @@ class EduModule(Xmill): if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'): sxml = self.options['slowniczek_xml'] assert element.text != '' - defloc = sxml.xpath("//definiendum[text()='%s']" % element.text) + if "'" in (element.text or ''): + defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip()) + else: + defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip()) if defloc: definiens = defloc[0].getnext() if definiens.tag == 'definiens': - subgen = EduModule(self.options) + subgen = EduModule(self.options, self.state) definiens_s = subgen.generate(definiens) return u'', u": " + definiens_s @@ -378,8 +407,9 @@ class EduModule(Xmill): def handle_obraz(self, element): frmt = self.options['format'] - name = element.attrib['nazwa'].strip() + name = element.attrib.get('nazwa', '').strip() image = frmt.get_image(name.strip()) + name = image.get_filename().rsplit('/', 1)[-1] img_path = "obraz/%s" % name.replace("_", "") frmt.attachments[img_path] = image return cmd("obraz", parms=[img_path])(self) @@ -472,13 +502,13 @@ class Wybor(Exercise): if not pytania: pytania = [element] for p in pytania: - solutions = re.split(r"[, ]+", p.attrib['rozw']) + solutions = re.split(r"[, ]+", p.attrib.get('rozw', '')) if len(solutions) != 1: is_single_choice = False break choices = p.xpath(".//*[@nazwa]") uniq = set() - for n in choices: uniq.add(n.attrib['nazwa']) + for n in choices: uniq.add(n.attrib.get('nazwa', '')) if len(choices) != len(uniq): is_single_choice = False break @@ -555,7 +585,7 @@ class Zastap(Luki): return question.xpath(".//zastap") def solution(self, piece): - return piece.attrib['rozw'] + return piece.attrib.get('rozw', '') def list_header(self): return u"Elementy do wstawienia" @@ -599,6 +629,9 @@ class EduModulePDFFormat(PDFFormat): style = get_resource('res/styles/edumed/pdf/edumed.sty') def get_texml(self): + substitute_hyphens(self.wldoc.edoc) + fix_hanging(self.wldoc.edoc) + self.attachments = {} edumod = EduModule({ "wldoc": self.wldoc,