X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/fcaf06749607b02e11e4edc3b8ae8313e1a1ef91..a36b71e91ec260eae71f133f450fe4de80974461:/librarian/pypdf.py
diff --git a/librarian/pypdf.py b/librarian/pypdf.py
index 387f647..9034753 100644
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,29 +9,20 @@ Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import with_statement
from copy import deepcopy
-import os
import os.path
import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
import re
import random
-from copy import deepcopy
-from subprocess import call, PIPE
+from urllib2 import urlopen
-from Texml.processor import process
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from librarian import DCNS, get_resource, IOFile
from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
def escape(really):
@@ -52,7 +43,7 @@ def escape(really):
def cmd(name, parms=None):
- def wrap(self, element):
+ def wrap(self, element=None):
pre, post = tag_open_close('cmd', name=name)
if parms:
@@ -60,9 +51,12 @@ def cmd(name, parms=None):
e = etree.Element("parm")
e.text = parm
pre += etree.tostring(e)
- pre += ""
- post = "" + post
- return pre, post
+ if element is not None:
+ pre += ""
+ post = "" + post
+ return pre, post
+ else:
+ return pre + post
return wrap
@@ -72,18 +66,19 @@ def mark_alien_characters(text):
class EduModule(Xmill):
- def __init__(self, options=None):
- super(EduModule, self).__init__(options)
+ def __init__(self, options=None, state=None):
+ super(EduModule, self).__init__(options, state)
self.activity_counter = 0
+ self.activity_last = None
self.exercise_counter = 0
def swap_endlines(txt):
if self.options['strofa']:
txt = txt.replace("/\n", '')
return txt
+ self.register_text_filter(swap_endlines)
self.register_text_filter(functions.substitute_entities)
self.register_text_filter(mark_alien_characters)
- self.register_text_filter(swap_endlines)
def get_dc(self, element, dc_field, single=False):
values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@ -104,11 +99,15 @@ class EduModule(Xmill):
u''
@escape(True)
- def get_authors(self, element):
- authors = self.get_dc(element, 'creator.expert') + \
- self.get_dc(element, 'creator.scenario') + \
- self.get_dc(element, 'creator.textbook')
- return u', '.join(authors)
+ def get_authors(self, element, which=None):
+ dc = self.options['wldoc'].book_info
+ if which is None:
+ authors = dc.authors_textbook + \
+ dc.authors_scenario + \
+ dc.authors_expert
+ else:
+ authors = getattr(dc, "authors_%s" % which)
+ return u', '.join(author.readable() for author in authors)
@escape(1)
def get_title(self, element):
@@ -129,9 +128,13 @@ class EduModule(Xmill):
\\usepackage{morefloats}
}{}'''),
u'''\\def\\authors{%s}''' % self.get_authors(element),
+ u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+ u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+ u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
u'''\\author{\\authors}''',
u'''\\title{%s}''' % self.get_title(element),
- u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
+ u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
u'']
@@ -143,7 +146,7 @@ class EduModule(Xmill):
return u"""
- """, """"""
+ """, """"""
@escape(1)
def handle_texcommand(self, element):
@@ -171,10 +174,6 @@ class EduModule(Xmill):
handle_naglowek_czesc = \
handle_naglowek_listy = \
handle_naglowek_osoba = \
- handle_naglowek_podrozdzial = \
- handle_naglowek_podrozdzial = \
- handle_naglowek_rozdzial = \
- handle_naglowek_rozdzial = \
handle_naglowek_scena = \
handle_nazwa_utworu = \
handle_nota = \
@@ -192,8 +191,31 @@ class EduModule(Xmill):
handle_srodtytul = \
handle_tytul_dziela = \
handle_wyroznienie = \
+ handle_dywiz = \
handle_texcommand
+ def handle_naglowek_rozdzial(self, element):
+ if not self.options['teacher']:
+ if element.text.startswith((u'Wiedza', u'Zadania', u'SÅowniczek')):
+ self.state['mute'] = False
+ else:
+ self.state['mute'] = True
+ return None
+ return self.handle_texcommand(element)
+ handle_naglowek_rozdzial.unmuter = True
+
+ def handle_naglowek_podrozdzial(self, element):
+ self.activity_counter = 0
+ return self.handle_texcommand(element)
+
+ def handle_uwaga(self, _e):
+ return None
+ def handle_extra(self, _e):
+ return None
+
+ def handle_nbsp(self, _e):
+ return ''
+
_handle_strofa = cmd("strofa")
def handle_strofa(self, element):
@@ -207,9 +229,12 @@ class EduModule(Xmill):
'activity_counter': self.activity_counter,
'sub_gen': True,
}
- submill = EduModule(self.options)
+ submill = EduModule(self.options, self.state)
- opis = submill.generate(element.xpath('opis')[0])
+ if element.xpath('opis'):
+ opis = submill.generate(element.xpath('opis')[0])
+ else:
+ opis = ''
n = element.xpath('wskazowki')
if n: wskazowki = submill.generate(n[0])
@@ -226,9 +251,16 @@ class EduModule(Xmill):
counter = self.activity_counter
- return u"""
+ if element.getnext().tag == 'aktywnosc' or self.activity_last.getnext() == element:
+ counter_tex = """%(counter)d.""" % locals()
+ else:
+ counter_tex = ''
+
+ self.activity_last = element
-%(counter)d.
+ return u"""
+
+%(counter_tex)s
%(czas)s
%(forma)s
@@ -255,14 +287,20 @@ class EduModule(Xmill):
return
def handle_lista(self, element, attrs={}):
- if not element.findall("punkt"):
- return None
ltype = element.attrib.get('typ', 'punkt')
+ if not element.findall("punkt"):
+ if ltype == 'czytelnia':
+ return 'W przygotowaniu.'
+ else:
+ return None
if ltype == 'slowniczek':
- surl = element.attrib.get('href', None)
+ surl = element.attrib.get('src', None)
+ if surl is None:
+ # print '** missing src on , setting default'
+ surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
sxml = None
if surl:
- sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
+ sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
listcmd = {'num': 'enumerate',
@@ -291,7 +329,7 @@ class EduModule(Xmill):
if not typ in exercise_handlers:
return '(no handler)'
self.options = {'exercise_counter': self.exercise_counter}
- handler = exercise_handlers[typ](self.options)
+ handler = exercise_handlers[typ](self.options, self.state)
return handler.generate(element)
# XXX this is copied from pyhtml.py, except for return and
@@ -301,14 +339,14 @@ class EduModule(Xmill):
definiens_s = ''
# let's pull definiens from another document
- if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'):
+ if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
sxml = self.options['slowniczek_xml']
assert element.text != ''
defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
if defloc:
definiens = defloc[0].getnext()
if definiens.tag == 'definiens':
- subgen = EduModule(self.options)
+ subgen = EduModule(self.options, self.state)
definiens_s = subgen.generate(definiens)
return u'', u": " + definiens_s
@@ -348,9 +386,38 @@ class EduModule(Xmill):
def handle_link(self, element):
if element.attrib.get('url'):
- return cmd('href', parms=[element.attrib['url']])(self, element)
+ url = element.attrib.get('url')
+ if url == element.text:
+ return cmd('url')(self, element)
+ else:
+ return cmd('href', parms=[element.attrib['url']])(self, element)
else:
- return cmd('em')(self, element)
+ return cmd('emph')(self, element)
+
+ def handle_obraz(self, element):
+ frmt = self.options['format']
+ name = element.attrib.get('nazwa', '').strip()
+ image = frmt.get_image(name.strip())
+ img_path = "obraz/%s" % name.replace("_", "")
+ frmt.attachments[img_path] = image
+ return cmd("obraz", parms=[img_path])(self)
+
+ def handle_video(self, element):
+ url = element.attrib.get('url')
+ if not url:
+ print '!!