fnp
/
librarian.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
some obvious fixes
[librarian.git]
/
librarian
/
pypdf.py
diff --git
a/librarian/pypdf.py
b/librarian/pypdf.py
index
6d6e0aa
..
b261eb6
100644
(file)
--- a/
librarian/pypdf.py
+++ b/
librarian/pypdf.py
@@
-9,30
+9,20
@@
Creates one big XML from the book and its children, converts it to LaTeX
with TeXML, then runs it by XeLaTeX.
"""
with TeXML, then runs it by XeLaTeX.
"""
-from __future__ import with_statement
from copy import deepcopy
from copy import deepcopy
-import os
import os.path
import shutil
import os.path
import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
import re
import random
import re
import random
-from copy import deepcopy
-from subprocess import call, PIPE
from urllib2 import urlopen
from urllib2 import urlopen
-from Texml.processor import process
from lxml import etree
from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
from librarian.dcparser import Person
from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from librarian import DCNS, get_resource, IOFile
from librarian import functions
from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
def escape(really):
def escape(really):
@@
-85,9
+75,9
@@
class EduModule(Xmill):
if self.options['strofa']:
txt = txt.replace("/\n", '<ctrl ch="\\"/>')
return txt
if self.options['strofa']:
txt = txt.replace("/\n", '<ctrl ch="\\"/>')
return txt
+ self.register_text_filter(swap_endlines)
self.register_text_filter(functions.substitute_entities)
self.register_text_filter(mark_alien_characters)
self.register_text_filter(functions.substitute_entities)
self.register_text_filter(mark_alien_characters)
- self.register_text_filter(swap_endlines)
def get_dc(self, element, dc_field, single=False):
values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
def get_dc(self, element, dc_field, single=False):
values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@
-204,8
+194,17
@@
class EduModule(Xmill):
handle_srodtytul = \
handle_tytul_dziela = \
handle_wyroznienie = \
handle_srodtytul = \
handle_tytul_dziela = \
handle_wyroznienie = \
+ handle_dywiz = \
handle_texcommand
handle_texcommand
+ def handle_uwaga(self, _e):
+ return None
+ def handle_extra(self, _e):
+ return None
+
+ def handle_nbsp(self, _e):
+ return '<spec cat="tilde" />'
+
_handle_strofa = cmd("strofa")
def handle_strofa(self, element):
_handle_strofa = cmd("strofa")
def handle_strofa(self, element):
@@
-221,7
+220,10
@@
class EduModule(Xmill):
}
submill = EduModule(self.options)
}
submill = EduModule(self.options)
- opis = submill.generate(element.xpath('opis')[0])
+ if element.xpath('opis'):
+ opis = submill.generate(element.xpath('opis')[0])
+ else:
+ opis = ''
n = element.xpath('wskazowki')
if n: wskazowki = submill.generate(n[0])
n = element.xpath('wskazowki')
if n: wskazowki = submill.generate(n[0])
@@
-267,14
+269,17
@@
class EduModule(Xmill):
return
def handle_lista(self, element, attrs={}):
return
def handle_lista(self, element, attrs={}):
- if not element.findall("punkt"):
- return None
ltype = element.attrib.get('typ', 'punkt')
ltype = element.attrib.get('typ', 'punkt')
+ if not element.findall("punkt"):
+ if ltype == 'czytelnia':
+ return 'W przygotowaniu.'
+ else:
+ return None
if ltype == 'slowniczek':
surl = element.attrib.get('src', None)
if surl is None:
# print '** missing src on <slowniczek>, setting default'
if ltype == 'slowniczek':
surl = element.attrib.get('src', None)
if surl is None:
# print '** missing src on <slowniczek>, setting default'
- surl = 'http://edukacjamedialna.edu.pl/
slowniczek
'
+ surl = 'http://edukacjamedialna.edu.pl/
lekcje/slowniczek/
'
sxml = None
if surl:
sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
sxml = None
if surl:
sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
@@
-373,7
+378,7
@@
class EduModule(Xmill):
def handle_obraz(self, element):
frmt = self.options['format']
def handle_obraz(self, element):
frmt = self.options['format']
- name = element.attrib
['nazwa']
.strip()
+ name = element.attrib
.get('nazwa', '')
.strip()
image = frmt.get_image(name.strip())
img_path = "obraz/%s" % name.replace("_", "")
frmt.attachments[img_path] = image
image = frmt.get_image(name.strip())
img_path = "obraz/%s" % name.replace("_", "")
frmt.attachments[img_path] = image
@@
-467,13
+472,13
@@
class Wybor(Exercise):
if not pytania:
pytania = [element]
for p in pytania:
if not pytania:
pytania = [element]
for p in pytania:
- solutions = re.split(r"[, ]+", p.attrib
['rozw']
)
+ solutions = re.split(r"[, ]+", p.attrib
.get('rozw', '')
)
if len(solutions) != 1:
is_single_choice = False
break
choices = p.xpath(".//*[@nazwa]")
uniq = set()
if len(solutions) != 1:
is_single_choice = False
break
choices = p.xpath(".//*[@nazwa]")
uniq = set()
- for n in choices: uniq.add(n.attrib
['nazwa']
)
+ for n in choices: uniq.add(n.attrib
.get('nazwa', '')
)
if len(choices) != len(uniq):
is_single_choice = False
break
if len(choices) != len(uniq):
is_single_choice = False
break
@@
-550,7
+555,7
@@
class Zastap(Luki):
return question.xpath(".//zastap")
def solution(self, piece):
return question.xpath(".//zastap")
def solution(self, piece):
- return piece.attrib
['rozw']
+ return piece.attrib
.get('rozw', '')
def list_header(self):
return u"Elementy do wstawienia"
def list_header(self):
return u"Elementy do wstawienia"
@@
-594,6
+599,9
@@
class EduModulePDFFormat(PDFFormat):
style = get_resource('res/styles/edumed/pdf/edumed.sty')
def get_texml(self):
style = get_resource('res/styles/edumed/pdf/edumed.sty')
def get_texml(self):
+ substitute_hyphens(self.wldoc.edoc)
+ fix_hanging(self.wldoc.edoc)
+
self.attachments = {}
edumod = EduModule({
"wldoc": self.wldoc,
self.attachments = {}
edumod = EduModule({
"wldoc": self.wldoc,