X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/a7133c06fd9738c11a2bf60b4fc09365d15da1d7..d329e7e5c88a9445e10083536ae1549e6fc1a1be:/librarian/pypdf.py?ds=inline
diff --git a/librarian/pypdf.py b/librarian/pypdf.py
index cb082ca..0793951 100644
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,26 +9,20 @@ Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import with_statement
-import os
+from copy import deepcopy
import os.path
import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
import re
-from copy import deepcopy
-from subprocess import call, PIPE
+import random
+from urllib2 import urlopen
-from Texml.processor import process
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
-from xmlutils import Xmill, tag, tagged, ifoption
+from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from librarian import DCNS, get_resource, IOFile
from librarian import functions
-from pdf import PDFFormat
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
def escape(really):
@@ -48,15 +42,21 @@ def escape(really):
return deco
-def cmd(name, pass_text=False):
- def wrap(self, element):
- pre = u'' % name
+def cmd(name, parms=None):
+ def wrap(self, element=None):
+ pre, post = tag_open_close('cmd', name=name)
- if pass_text:
- pre += "%s" % element.text
- return pre + ''
+ if parms:
+ for parm in parms:
+ e = etree.Element("parm")
+ e.text = parm
+ pre += etree.tostring(e)
+ if element is not None:
+ pre += ""
+ post = "" + post
+ return pre, post
else:
- return pre, ''
+ return pre + post
return wrap
@@ -66,9 +66,16 @@ def mark_alien_characters(text):
class EduModule(Xmill):
- def __init__(self, options=None):
- super(EduModule, self).__init__(options)
+ def __init__(self, options=None, state=None):
+ super(EduModule, self).__init__(options, state)
self.activity_counter = 0
+ self.exercise_counter = 0
+
+ def swap_endlines(txt):
+ if self.options['strofa']:
+ txt = txt.replace("/\n", '')
+ return txt
+ self.register_text_filter(swap_endlines)
self.register_text_filter(functions.substitute_entities)
self.register_text_filter(mark_alien_characters)
@@ -91,11 +98,15 @@ class EduModule(Xmill):
u''
@escape(True)
- def get_authors(self, element):
- authors = self.get_dc(element, 'creator.expert') + \
- self.get_dc(element, 'creator.scenario') + \
- self.get_dc(element, 'creator.textbook')
- return u', '.join(authors)
+ def get_authors(self, element, which=None):
+ dc = self.options['wldoc'].book_info
+ if which is None:
+ authors = dc.authors_textbook + \
+ dc.authors_scenario + \
+ dc.authors_expert
+ else:
+ authors = getattr(dc, "authors_%s" % which)
+ return u', '.join(author.readable() for author in authors)
@escape(1)
def get_title(self, element):
@@ -116,32 +127,97 @@ class EduModule(Xmill):
\\usepackage{morefloats}
}{}'''),
u'''\\def\\authors{%s}''' % self.get_authors(element),
+ u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+ u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+ u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
u'''\\author{\\authors}''',
u'''\\title{%s}''' % self.get_title(element),
- u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
+ u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
u'']
return u"".join(filter(None, lines)), u''
- handle_naglowek_rozdzial = escape(True)(cmd("naglowekrozdzial", True))
- handle_naglowek_podrozdzial = escape(True)(cmd("naglowekpodrozdzial", True))
-
@escape(1)
def handle_powiesc(self, element):
return u"""
- """, """"""
+ """, """"""
+
+ @escape(1)
+ def handle_texcommand(self, element):
+ cmd = functions.texcommand(element.tag)
+ return u'' % cmd, u''
+
+ handle_akap = \
+ handle_akap = \
+ handle_akap_cd = \
+ handle_akap_cd = \
+ handle_akap_dialog = \
+ handle_akap_dialog = \
+ handle_autor_utworu = \
+ handle_dedykacja = \
+ handle_didaskalia = \
+ handle_didask_tekst = \
+ handle_dlugi_cytat = \
+ handle_dzielo_nadrzedne = \
+ handle_lista_osoba = \
+ handle_mat = \
+ handle_miejsce_czas = \
+ handle_motto = \
+ handle_motto_podpis = \
+ handle_naglowek_akt = \
+ handle_naglowek_czesc = \
+ handle_naglowek_listy = \
+ handle_naglowek_osoba = \
+ handle_naglowek_podrozdzial = \
+ handle_naglowek_scena = \
+ handle_nazwa_utworu = \
+ handle_nota = \
+ handle_osoba = \
+ handle_pa = \
+ handle_pe = \
+ handle_podtytul = \
+ handle_poezja_cyt = \
+ handle_pr = \
+ handle_pt = \
+ handle_sekcja_asterysk = \
+ handle_sekcja_swiatlo = \
+ handle_separator_linia = \
+ handle_slowo_obce = \
+ handle_srodtytul = \
+ handle_tytul_dziela = \
+ handle_wyroznienie = \
+ handle_dywiz = \
+ handle_texcommand
+
+ def handle_naglowek_rozdzial(self, element):
+ if not self.options['teacher']:
+ if element.text.startswith((u'Wiedza', u'Zadania', u'SÅowniczek')):
+ self.state['mute'] = False
+ else:
+ self.state['mute'] = True
+ return None
+ return self.handle_texcommand(element)
+ handle_naglowek_rozdzial.unmuter = True
+
+
+ def handle_uwaga(self, _e):
+ return None
+ def handle_extra(self, _e):
+ return None
- handle_autor_utworu = cmd('autorutworu', True)
- handle_nazwa_utworu = cmd('nazwautworu', True)
- handle_dzielo_nadrzedne = cmd('dzielonadrzedne', True)
- handle_podtytul = cmd('podtytul', True)
+ def handle_nbsp(self, _e):
+ return ''
- handle_akap = handle_akap_dialog = handle_akap_cd = lambda s, e: ("\n", "\n")
- handle_strofa = lambda s, e: ("\n","\n")
+ _handle_strofa = cmd("strofa")
+
+ def handle_strofa(self, element):
+ self.options = {'strofa': True}
+ return self._handle_strofa(element)
def handle_aktywnosc(self, element):
self.activity_counter += 1
@@ -150,9 +226,12 @@ class EduModule(Xmill):
'activity_counter': self.activity_counter,
'sub_gen': True,
}
- submill = EduModule(self.options)
+ submill = EduModule(self.options, self.state)
- opis = submill.generate(element.xpath('opis')[0])
+ if element.xpath('opis'):
+ opis = submill.generate(element.xpath('opis')[0])
+ else:
+ opis = ''
n = element.xpath('wskazowki')
if n: wskazowki = submill.generate(n[0])
@@ -170,11 +249,16 @@ class EduModule(Xmill):
counter = self.activity_counter
return u"""
-Czas: %(czas)s min
-Forma: %(forma)s
-%(pomoce)s
+
+%(counter)d.
+
+ %(czas)s
+ %(forma)s
+ %(pomoce)s
+
+
-%(counter)d. %(opis)s
+%(opis)s
%(wskazowki)s
""" % locals()
@@ -192,162 +276,358 @@ Forma: %(forma)s
def handle_forma(self, *_):
return
-# def handle_cwiczenie(self, element):
-# exercise_handlers = {
-# 'wybor': Wybor,
-# 'uporzadkuj': Uporzadkuj,
-# 'luki': Luki,
-# 'zastap': Zastap,
-# 'przyporzadkuj': Przyporzadkuj,
-# 'prawdafalsz': PrawdaFalsz
-# }
-
-# typ = element.attrib['typ']
-# handler = exercise_handlers[typ](self.options)
-# return handler.generate(element)
-
-# # Lists
-# def handle_lista(self, element, attrs={}):
-# ltype = element.attrib.get('typ', 'punkt')
-# if ltype == 'slowniczek':
-# surl = element.attrib.get('href', None)
-# sxml = None
-# if surl:
-# sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
-# self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
-# return '
', '
'
-
-# listtag = {'num': 'ol',
-# 'punkt': 'ul',
-# 'alfa': 'ul',
-# 'czytelnia': 'ul'}[ltype]
-
-# classes = attrs.get('class', '')
-# if classes: del attrs['class']
-
-# attrs_s = ' '.join(['%s="%s"' % kv for kv in attrs.items()])
-# if attrs_s: attrs_s = ' ' + attrs_s
-
-# return '<%s class="lista %s %s"%s>' % (listtag, ltype, classes, attrs_s), '%s>' % listtag
-
-# def handle_punkt(self, element):
-# if self.options['slowniczek']:
-# return '', '
'
-# else:
-# return '', ''
-
-# def handle_definiendum(self, element):
-# nxt = element.getnext()
-# definiens_s = ''
-
-# # let's pull definiens from another document
-# if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'):
-# sxml = self.options['slowniczek_xml']
-# assert element.text != ''
-# defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
-# if defloc:
-# definiens = defloc[0].getnext()
-# if definiens.tag == 'definiens':
-# subgen = EduModule(self.options)
-# definiens_s = subgen.generate(definiens)
-
-# return u"", u"" + definiens_s
-
-# def handle_definiens(self, element):
-# return u"", u""
-
-
-# def handle_podpis(self, element):
-# return u"""""", u"
"
-
-# def handle_tabela(self, element):
-# has_frames = int(element.attrib.get("ramki", "0"))
-# if has_frames: frames_c = "framed"
-# else: frames_c = ""
-# return u""""
-
-# def handle_wiersz(self, element):
-# return u"", u"
"
-
-# def handle_kol(self, element):
-# return u"", u" | "
-
-# def handle_rdf__RDF(self, _):
-# # ustal w opcjach rzeczy :D
-# return
-
-# def handle_link(self, element):
-# if 'material' in element.attrib:
-# formats = re.split(r"[, ]+", element.attrib['format'])
-# fmt_links = []
-# for f in formats:
-# fmt_links.append(u'%s' % (self.options['urlmapper'].url_for_material(element.attrib['material'], f), f.upper()))
-
-# return u"", u' (%s)' % u' '.join(fmt_links)
-
-
-# class Exercise(EduModule):
-# def __init__(self, *args, **kw):
-# self.question_counter = 0
-# super(Exercise, self).__init__(*args, **kw)
-
-# def handle_rozw_kom(self, element):
-# return u""""""
-
-# def handle_cwiczenie(self, element):
-# self.options = {'exercise': element.attrib['typ']}
-# self.question_counter = 0
-# self.piece_counter = 0
-
-# pre = u"""
-#
-# """
-# # Add a single tag if it's not there
-# if not element.xpath(".//pytanie"):
-# qpre, qpost = self.handle_pytanie(element)
-# pre = pre + qpre
-# post = qpost + post
-# return pre, post
-
-# def handle_pytanie(self, element):
-# """This will handle element, when there is no
-# """
-# add_class = ""
-# self.question_counter += 1
-# self.piece_counter = 0
-# solution = element.attrib.get('rozw', None)
-# if solution: solution_s = ' data-solution="%s"' % solution
-# else: solution_s = ''
-
-# handles = element.attrib.get('uchwyty', None)
-# if handles:
-# add_class += ' handles handles-%s' % handles
-# self.options = {'handles': handles}
-
-# minimum = element.attrib.get('min', None)
-# if minimum: minimum_s = ' data-minimum="%d"' % int(minimum)
-# else: minimum_s = ''
-
-# return '' %\
-# (add_class, self.question_counter, solution_s + minimum_s), \
-# "
"
+ def handle_lista(self, element, attrs={}):
+ ltype = element.attrib.get('typ', 'punkt')
+ if not element.findall("punkt"):
+ if ltype == 'czytelnia':
+ return 'W przygotowaniu.'
+ else:
+ return None
+ if ltype == 'slowniczek':
+ surl = element.attrib.get('src', None)
+ if surl is None:
+ # print '** missing src on , setting default'
+ surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+ sxml = None
+ if surl:
+ sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
+ self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
+
+ listcmd = {'num': 'enumerate',
+ 'punkt': 'itemize',
+ 'alfa': 'itemize',
+ 'slowniczek': 'itemize',
+ 'czytelnia': 'itemize'}[ltype]
+
+ return u'' % listcmd, u''
+
+ def handle_punkt(self, element):
+ return '', ''
+
+ def handle_cwiczenie(self, element):
+ exercise_handlers = {
+ 'wybor': Wybor,
+ 'uporzadkuj': Uporzadkuj,
+ 'luki': Luki,
+ 'zastap': Zastap,
+ 'przyporzadkuj': Przyporzadkuj,
+ 'prawdafalsz': PrawdaFalsz
+ }
+
+ typ = element.attrib['typ']
+ self.exercise_counter += 1
+ if not typ in exercise_handlers:
+ return '(no handler)'
+ self.options = {'exercise_counter': self.exercise_counter}
+ handler = exercise_handlers[typ](self.options, self.state)
+ return handler.generate(element)
+
+ # XXX this is copied from pyhtml.py, except for return and
+ # should be refactored for no code duplication
+ def handle_definiendum(self, element):
+ nxt = element.getnext()
+ definiens_s = ''
+
+ # let's pull definiens from another document
+ if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
+ sxml = self.options['slowniczek_xml']
+ assert element.text != ''
+ defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+ if defloc:
+ definiens = defloc[0].getnext()
+ if definiens.tag == 'definiens':
+ subgen = EduModule(self.options, self.state)
+ definiens_s = subgen.generate(definiens)
+
+ return u'', u": " + definiens_s
+
+ def handle_definiens(self, element):
+ return u"", u""
+
+ def handle_podpis(self, element):
+ return u"""""", u""
+
+ def handle_tabela(self, element):
+ max_col = 0
+ for w in element.xpath("wiersz"):
+ ks = w.xpath("kol")
+ if max_col < len(ks):
+ max_col = len(ks)
+ self.options = {'columnts': max_col}
+ # styling:
+ # has_frames = int(element.attrib.get("ramki", "0"))
+ # if has_frames: frames_c = "framed"
+ # else: frames_c = ""
+ # return u""""
+ return u'''
+tabular%s
+ ''' % ('l' * max_col), \
+ u'''tabular'''
+
+ @escape(1)
+ def handle_wiersz(self, element):
+ return u"", u''
+
+ @escape(1)
+ def handle_kol(self, element):
+ if element.getnext() is not None:
+ return u"", u''
+ return u"", u""
+
+ def handle_link(self, element):
+ if element.attrib.get('url'):
+ url = element.attrib.get('url')
+ if url == element.text:
+ return cmd('url')(self, element)
+ else:
+ return cmd('href', parms=[element.attrib['url']])(self, element)
+ else:
+ return cmd('emph')(self, element)
+
+ def handle_obraz(self, element):
+ frmt = self.options['format']
+ name = element.attrib.get('nazwa', '').strip()
+ image = frmt.get_image(name.strip())
+ img_path = "obraz/%s" % name.replace("_", "")
+ frmt.attachments[img_path] = image
+ return cmd("obraz", parms=[img_path])(self)
+
+ def handle_video(self, element):
+ url = element.attrib.get('url')
+ if not url:
+ print '!!