X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/527b1f9a4cfd256107d1c8fb7fe2f208a5be9409..565aa32ff31bdbe6e2aaf9f47b124ac1f883a990:/librarian/pypdf.py?ds=sidebyside
diff --git a/librarian/pypdf.py b/librarian/pypdf.py
index 2bcd8d0..9851cb1 100644
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,30 +9,19 @@ Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import with_statement
from copy import deepcopy
-import os
import os.path
import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
import re
import random
-from copy import deepcopy
-from subprocess import call, PIPE
from urllib2 import urlopen
-from Texml.processor import process
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
-from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from xmlutils import Xmill, ifoption, tag_open_close
+from librarian import DCNS, get_resource, IOFile
from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
def escape(really):
@@ -43,7 +32,8 @@ def escape(really):
prefix = (u'' % (really and 1 or 0))
postfix = u''
if isinstance(value, list):
- import pdb; pdb.set_trace()
+ import pdb
+ pdb.set_trace()
if isinstance(value, tuple):
return prefix + value[0], value[1] + postfix
else:
@@ -76,18 +66,19 @@ def mark_alien_characters(text):
class EduModule(Xmill):
- def __init__(self, options=None):
- super(EduModule, self).__init__(options)
+ def __init__(self, options=None, state=None):
+ super(EduModule, self).__init__(options, state)
self.activity_counter = 0
+ self.activity_last = None
self.exercise_counter = 0
def swap_endlines(txt):
if self.options['strofa']:
txt = txt.replace("/\n", '')
return txt
+ self.register_text_filter(swap_endlines)
self.register_text_filter(functions.substitute_entities)
self.register_text_filter(mark_alien_characters)
- self.register_text_filter(swap_endlines)
def get_dc(self, element, dc_field, single=False):
values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@ -96,16 +87,15 @@ class EduModule(Xmill):
return values
def handle_rdf__RDF(self, _):
- "skip metadata in generation"
+ """skip metadata in generation"""
return
@escape(True)
def get_rightsinfo(self, element):
rights_lic = self.get_dc(element, 'rights.license', True)
- return u'' + \
- (rights_lic and u'%s' % rights_lic or '') +\
- u'%s' % self.get_dc(element, 'rights', True) +\
- u''
+ return u'' + (rights_lic and u'%s' % rights_lic or '') + \
+ u'%s' % self.get_dc(element, 'rights', True) + \
+ u''
@escape(True)
def get_authors(self, element, which=None):
@@ -116,7 +106,7 @@ class EduModule(Xmill):
dc.authors_expert
else:
authors = getattr(dc, "authors_%s" % which)
- return u', '.join(author.readable() for author in authors)
+ return u', '.join(author.readable() for author in authors if author)
@escape(1)
def get_title(self, element):
@@ -125,31 +115,31 @@ class EduModule(Xmill):
def handle_utwor(self, element):
lines = [
u'''
-
-
- \\documentclass[%s]{wl}
- \\usepackage{style}''' % self.options['customization_str'],
- self.options['has_cover'] and '\usepackage{makecover}',
- (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
- (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
- (self.options['morefloats'] == 'none' and
- u'''\\IfFileExists{morefloats.sty}{
- \\usepackage{morefloats}
- }{}'''),
- u'''\\def\\authors{%s}''' % self.get_authors(element),
- u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
- u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
- u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
-
- u'''\\author{\\authors}''',
- u'''\\title{%s}''' % self.get_title(element),
- u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
- u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
- u'']
+
+
+ \\documentclass[%s]{wl}
+ \\usepackage{style}''' % self.options['customization_str'],
+ self.options['has_cover'] and '\usepackage{makecover}',
+ (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
+ (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
+ (self.options['morefloats'] == 'none' and
+ u'''\\IfFileExists{morefloats.sty}{
+ \\usepackage{morefloats}
+ }{}'''),
+ u'''\\def\\authors{%s}''' % self.get_authors(element),
+ u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+ u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+ u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
+ u'''\\author{\\authors}''',
+ u'''\\title{%s}''' % self.get_title(element),
+ u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
+ u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
+ u''
+ ]
return u"".join(filter(None, lines)), u''
-
@escape(1)
def handle_powiesc(self, element):
return u"""
@@ -163,54 +153,76 @@ class EduModule(Xmill):
return u'' % cmd, u''
handle_akap = \
- handle_akap = \
- handle_akap_cd = \
- handle_akap_cd = \
- handle_akap_dialog = \
- handle_akap_dialog = \
- handle_autor_utworu = \
- handle_dedykacja = \
- handle_didaskalia = \
- handle_didask_tekst = \
- handle_dlugi_cytat = \
- handle_dzielo_nadrzedne = \
- handle_lista_osoba = \
- handle_mat = \
- handle_miejsce_czas = \
- handle_motto = \
- handle_motto_podpis = \
- handle_naglowek_akt = \
- handle_naglowek_czesc = \
- handle_naglowek_listy = \
- handle_naglowek_osoba = \
- handle_naglowek_podrozdzial = \
- handle_naglowek_podrozdzial = \
- handle_naglowek_rozdzial = \
- handle_naglowek_rozdzial = \
- handle_naglowek_scena = \
- handle_nazwa_utworu = \
- handle_nota = \
- handle_osoba = \
- handle_pa = \
- handle_pe = \
- handle_podtytul = \
- handle_poezja_cyt = \
- handle_pr = \
- handle_pt = \
- handle_sekcja_asterysk = \
- handle_sekcja_swiatlo = \
- handle_separator_linia = \
- handle_slowo_obce = \
- handle_srodtytul = \
- handle_tytul_dziela = \
- handle_wyroznienie = \
- handle_texcommand
+ handle_akap_cd = \
+ handle_akap_dialog = \
+ handle_autor_utworu = \
+ handle_dedykacja = \
+ handle_didaskalia = \
+ handle_didask_tekst = \
+ handle_dlugi_cytat = \
+ handle_dzielo_nadrzedne = \
+ handle_lista_osoba = \
+ handle_mat = \
+ handle_miejsce_czas = \
+ handle_motto = \
+ handle_motto_podpis = \
+ handle_naglowek_akt = \
+ handle_naglowek_czesc = \
+ handle_naglowek_listy = \
+ handle_naglowek_osoba = \
+ handle_naglowek_scena = \
+ handle_nazwa_utworu = \
+ handle_nota = \
+ handle_osoba = \
+ handle_pa = \
+ handle_pe = \
+ handle_podtytul = \
+ handle_poezja_cyt = \
+ handle_pr = \
+ handle_pt = \
+ handle_sekcja_asterysk = \
+ handle_sekcja_swiatlo = \
+ handle_separator_linia = \
+ handle_slowo_obce = \
+ handle_srodtytul = \
+ handle_tytul_dziela = \
+ handle_wyroznienie = \
+ handle_dywiz = \
+ handle_texcommand
+
+ def handle_naglowek_rozdzial(self, element):
+ if not self.options['teacher']:
+ if element.text.startswith((u'Wiedza', u'Zadania', u'SÅowniczek', u'Dla ucznia')):
+ self.state['mute'] = False
+ else:
+ self.state['mute'] = True
+ return None
+ return self.handle_texcommand(element)
+ handle_naglowek_rozdzial.unmuter = True
+
+ def handle_naglowek_podrozdzial(self, element):
+ self.activity_counter = 0
+ if not self.options['teacher']:
+ if element.text.startswith(u'Dla ucznia'):
+ self.state['mute'] = False
+ return None
+ elif element.text.startswith(u'Dla nauczyciela'):
+ self.state['mute'] = True
+ return None
+ elif self.state['mute']:
+ return None
+ return self.handle_texcommand(element)
+ handle_naglowek_podrozdzial.unmuter = True
def handle_uwaga(self, _e):
return None
+
def handle_extra(self, _e):
return None
+ def handle_nbsp(self, _e):
+ return ''
+
_handle_strofa = cmd("strofa")
def handle_strofa(self, element):
@@ -224,18 +236,24 @@ class EduModule(Xmill):
'activity_counter': self.activity_counter,
'sub_gen': True,
}
- submill = EduModule(self.options)
+ submill = EduModule(self.options, self.state)
- opis = submill.generate(element.xpath('opis')[0])
+ if element.xpath('opis'):
+ opis = submill.generate(element.xpath('opis')[0])
+ else:
+ opis = ''
n = element.xpath('wskazowki')
- if n: wskazowki = submill.generate(n[0])
-
- else: wskazowki = ''
+ if n:
+ wskazowki = submill.generate(n[0])
+ else:
+ wskazowki = ''
n = element.xpath('pomoce')
- if n: pomoce = submill.generate(n[0])
- else: pomoce = ''
+ if n:
+ pomoce = submill.generate(n[0])
+ else:
+ pomoce = ''
forma = ''.join(element.xpath('forma/text()'))
@@ -243,9 +261,16 @@ class EduModule(Xmill):
counter = self.activity_counter
+ if element.getnext().tag == 'aktywnosc' or (self.activity_last and self.activity_last.getnext() == element):
+ counter_tex = """%(counter)d.""" % locals()
+ else:
+ counter_tex = ''
+
+ self.activity_last = element
+
return u"""
-%(counter)d.
+%(counter_tex)s
%(czas)s
%(forma)s
@@ -271,25 +296,28 @@ class EduModule(Xmill):
def handle_forma(self, *_):
return
- def handle_lista(self, element, attrs={}):
- if not element.findall("punkt"):
- return None
+ def handle_lista(self, element, attrs=None):
ltype = element.attrib.get('typ', 'punkt')
+ if not element.findall("punkt"):
+ if ltype == 'czytelnia':
+ return 'W przygotowaniu.'
+ else:
+ return None
if ltype == 'slowniczek':
surl = element.attrib.get('src', None)
if surl is None:
# print '** missing src on , setting default'
- surl = 'http://edukacjamedialna.edu.pl/slowniczek'
- sxml = None
- if surl:
- sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
- self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
-
- listcmd = {'num': 'enumerate',
- 'punkt': 'itemize',
- 'alfa': 'itemize',
- 'slowniczek': 'itemize',
- 'czytelnia': 'itemize'}[ltype]
+ surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+ sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
+ self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
+
+ listcmd = {
+ 'num': 'enumerate',
+ 'punkt': 'itemize',
+ 'alfa': 'itemize',
+ 'slowniczek': 'itemize',
+ 'czytelnia': 'itemize'
+ }[ltype]
return u'' % listcmd, u''
@@ -308,10 +336,10 @@ class EduModule(Xmill):
typ = element.attrib['typ']
self.exercise_counter += 1
- if not typ in exercise_handlers:
+ if typ not in exercise_handlers:
return '(no handler)'
self.options = {'exercise_counter': self.exercise_counter}
- handler = exercise_handlers[typ](self.options)
+ handler = exercise_handlers[typ](self.options, self.state)
return handler.generate(element)
# XXX this is copied from pyhtml.py, except for return and
@@ -324,11 +352,14 @@ class EduModule(Xmill):
if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
sxml = self.options['slowniczek_xml']
assert element.text != ''
- defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+ if "'" in (element.text or ''):
+ defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
+ else:
+ defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
if defloc:
definiens = defloc[0].getnext()
if definiens.tag == 'definiens':
- subgen = EduModule(self.options)
+ subgen = EduModule(self.options, self.state)
definiens_s = subgen.generate(definiens)
return u'', u": " + definiens_s
@@ -347,14 +378,13 @@ class EduModule(Xmill):
max_col = len(ks)
self.options = {'columnts': max_col}
# styling:
- # has_frames = int(element.attrib.get("ramki", "0"))
- # if has_frames: frames_c = "framed"
- # else: frames_c = ""
- # return u""""
+ # has_frames = int(element.attrib.get("ramki", "0"))
+ # if has_frames: frames_c = "framed"
+ # else: frames_c = ""
+ # return u""""
return u'''
tabular%s
- ''' % ('l' * max_col), \
- u'''tabular'''
+ ''' % ('l' * max_col), u'''tabular'''
@escape(1)
def handle_wiersz(self, element):
@@ -378,8 +408,9 @@ class EduModule(Xmill):
def handle_obraz(self, element):
frmt = self.options['format']
- name = element.attrib['nazwa'].strip()
+ name = element.attrib.get('nazwa', '').strip()
image = frmt.get_image(name.strip())
+ name = image.get_filename().rsplit('/', 1)[-1]
img_path = "obraz/%s" % name.replace("_", "")
frmt.attachments[img_path] = image
return cmd("obraz", parms=[img_path])(self)
@@ -394,8 +425,7 @@ class EduModule(Xmill):
print '!! unknown