X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fcaf06749607b02e11e4edc3b8ae8313e1a1ef91..527b1f9a4cfd256107d1c8fb7fe2f208a5be9409:/librarian/pypdf.py diff --git a/librarian/pypdf.py b/librarian/pypdf.py index 387f647..2bcd8d0 100644 --- a/librarian/pypdf.py +++ b/librarian/pypdf.py @@ -20,6 +20,7 @@ import re import random from copy import deepcopy from subprocess import call, PIPE +from urllib2 import urlopen from Texml.processor import process from lxml import etree @@ -52,7 +53,7 @@ def escape(really): def cmd(name, parms=None): - def wrap(self, element): + def wrap(self, element=None): pre, post = tag_open_close('cmd', name=name) if parms: @@ -60,9 +61,12 @@ def cmd(name, parms=None): e = etree.Element("parm") e.text = parm pre += etree.tostring(e) - pre += "" - post = "" + post - return pre, post + if element is not None: + pre += "" + post = "" + post + return pre, post + else: + return pre + post return wrap @@ -104,11 +108,15 @@ class EduModule(Xmill): u'' @escape(True) - def get_authors(self, element): - authors = self.get_dc(element, 'creator.expert') + \ - self.get_dc(element, 'creator.scenario') + \ - self.get_dc(element, 'creator.textbook') - return u', '.join(authors) + def get_authors(self, element, which=None): + dc = self.options['wldoc'].book_info + if which is None: + authors = dc.authors_textbook + \ + dc.authors_scenario + \ + dc.authors_expert + else: + authors = getattr(dc, "authors_%s" % which) + return u', '.join(author.readable() for author in authors) @escape(1) def get_title(self, element): @@ -129,9 +137,13 @@ class EduModule(Xmill): \\usepackage{morefloats} }{}'''), u'''\\def\\authors{%s}''' % self.get_authors(element), + u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'), + u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'), + u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'), + u'''\\author{\\authors}''', u'''\\title{%s}''' % self.get_title(element), - u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True), + u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(), u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element), u''] @@ -143,7 +155,7 @@ class EduModule(Xmill): return u""" - """, """""" + """, """""" @escape(1) def handle_texcommand(self, element): @@ -194,6 +206,11 @@ class EduModule(Xmill): handle_wyroznienie = \ handle_texcommand + def handle_uwaga(self, _e): + return None + def handle_extra(self, _e): + return None + _handle_strofa = cmd("strofa") def handle_strofa(self, element): @@ -227,7 +244,7 @@ class EduModule(Xmill): counter = self.activity_counter return u""" - + %(counter)d. %(czas)s @@ -259,10 +276,13 @@ class EduModule(Xmill): return None ltype = element.attrib.get('typ', 'punkt') if ltype == 'slowniczek': - surl = element.attrib.get('href', None) + surl = element.attrib.get('src', None) + if surl is None: + # print '** missing src on , setting default' + surl = 'http://edukacjamedialna.edu.pl/slowniczek' sxml = None if surl: - sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string()) + sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string()) self.options = {'slowniczek': True, 'slowniczek_xml': sxml } listcmd = {'num': 'enumerate', @@ -301,7 +321,7 @@ class EduModule(Xmill): definiens_s = '' # let's pull definiens from another document - if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'): + if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'): sxml = self.options['slowniczek_xml'] assert element.text != '' defloc = sxml.xpath("//definiendum[text()='%s']" % element.text) @@ -348,9 +368,38 @@ class EduModule(Xmill): def handle_link(self, element): if element.attrib.get('url'): - return cmd('href', parms=[element.attrib['url']])(self, element) + url = element.attrib.get('url') + if url == element.text: + return cmd('url')(self, element) + else: + return cmd('href', parms=[element.attrib['url']])(self, element) else: - return cmd('em')(self, element) + return cmd('emph')(self, element) + + def handle_obraz(self, element): + frmt = self.options['format'] + name = element.attrib['nazwa'].strip() + image = frmt.get_image(name.strip()) + img_path = "obraz/%s" % name.replace("_", "") + frmt.attachments[img_path] = image + return cmd("obraz", parms=[img_path])(self) + + def handle_video(self, element): + url = element.attrib.get('url') + if not url: + print '!!