X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/527b1f9a4cfd256107d1c8fb7fe2f208a5be9409..565aa32ff31bdbe6e2aaf9f47b124ac1f883a990:/librarian/pypdf.py?ds=sidebyside

diff --git a/librarian/pypdf.py b/librarian/pypdf.py
index 2bcd8d0..9851cb1 100644
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,30 +9,19 @@ Creates one big XML from the book and its children, converts it to LaTeX
 with TeXML, then runs it by XeLaTeX.
 
 """
-from __future__ import with_statement
 from copy import deepcopy
-import os
 import os.path
 import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
 import re
 import random
-from copy import deepcopy
-from subprocess import call, PIPE
 from urllib2 import urlopen
 
-from Texml.processor import process
 from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
 
-from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
-from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from xmlutils import Xmill, ifoption, tag_open_close
+from librarian import DCNS, get_resource, IOFile
 from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
 
 
 def escape(really):
@@ -43,7 +32,8 @@ def escape(really):
             prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
             postfix = u'</TeXML>'
             if isinstance(value, list):
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
             if isinstance(value, tuple):
                 return prefix + value[0], value[1] + postfix
             else:
@@ -76,18 +66,19 @@ def mark_alien_characters(text):
 
 
 class EduModule(Xmill):
-    def __init__(self, options=None):
-        super(EduModule, self).__init__(options)
+    def __init__(self, options=None, state=None):
+        super(EduModule, self).__init__(options, state)
         self.activity_counter = 0
+        self.activity_last = None
         self.exercise_counter = 0
 
         def swap_endlines(txt):
             if self.options['strofa']:
                 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
             return txt
+        self.register_text_filter(swap_endlines)
         self.register_text_filter(functions.substitute_entities)
         self.register_text_filter(mark_alien_characters)
-        self.register_text_filter(swap_endlines)
 
     def get_dc(self, element, dc_field, single=False):
         values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@ -96,16 +87,15 @@ class EduModule(Xmill):
         return values
 
     def handle_rdf__RDF(self, _):
-        "skip metadata in generation"
+        """skip metadata in generation"""
         return
 
     @escape(True)
     def get_rightsinfo(self, element):
         rights_lic = self.get_dc(element, 'rights.license', True)
-        return u'<cmd name="rightsinfostr">' + \
-          (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
-          u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
-          u'</cmd>'
+        return u'<cmd name="rightsinfostr">' + (rights_lic and u'<opt>%s</opt>' % rights_lic or '') + \
+            u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
+            u'</cmd>'
 
     @escape(True)
     def get_authors(self, element, which=None):
@@ -116,7 +106,7 @@ class EduModule(Xmill):
                 dc.authors_expert
         else:
             authors = getattr(dc, "authors_%s" % which)
-        return u', '.join(author.readable() for author in authors)
+        return u', '.join(author.readable() for author in authors if author)
 
     @escape(1)
     def get_title(self, element):
@@ -125,31 +115,31 @@ class EduModule(Xmill):
     def handle_utwor(self, element):
         lines = [
             u'''
-    <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
-        <TeXML escape="0">
-        \\documentclass[%s]{wl}
-        \\usepackage{style}''' % self.options['customization_str'],
-    self.options['has_cover'] and '\usepackage{makecover}',
-    (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
-    (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
-    (self.options['morefloats'] == 'none' and
-     u'''\\IfFileExists{morefloats.sty}{
-            \\usepackage{morefloats}
-        }{}'''),
-    u'''\\def\\authors{%s}''' % self.get_authors(element),
-    u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
-    u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
-    u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
-    
-    u'''\\author{\\authors}''',
-    u'''\\title{%s}''' % self.get_title(element),
-    u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
-    u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
-    u'</TeXML>']
+                <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
+                <TeXML escape="0">
+                \\documentclass[%s]{wl}
+                \\usepackage{style}''' % self.options['customization_str'],
+            self.options['has_cover'] and '\usepackage{makecover}',
+            (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
+            (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
+            (self.options['morefloats'] == 'none' and
+                u'''\\IfFileExists{morefloats.sty}{
+                \\usepackage{morefloats}
+                }{}'''),
+            u'''\\def\\authors{%s}''' % self.get_authors(element),
+            u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+            u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+            u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+
+            u'''\\author{\\authors}''',
+            u'''\\title{%s}''' % self.get_title(element),
+            u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
+            u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
+            u'</TeXML>'
+        ]
 
         return u"".join(filter(None, lines)), u'</TeXML>'
 
-
     @escape(1)
     def handle_powiesc(self, element):
         return u"""
@@ -163,54 +153,76 @@ class EduModule(Xmill):
         return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
 
     handle_akap = \
-    handle_akap = \
-    handle_akap_cd = \
-    handle_akap_cd = \
-    handle_akap_dialog = \
-    handle_akap_dialog = \
-    handle_autor_utworu = \
-    handle_dedykacja = \
-    handle_didaskalia = \
-    handle_didask_tekst = \
-    handle_dlugi_cytat = \
-    handle_dzielo_nadrzedne = \
-    handle_lista_osoba = \
-    handle_mat = \
-    handle_miejsce_czas = \
-    handle_motto = \
-    handle_motto_podpis = \
-    handle_naglowek_akt = \
-    handle_naglowek_czesc = \
-    handle_naglowek_listy = \
-    handle_naglowek_osoba = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_rozdzial = \
-    handle_naglowek_rozdzial = \
-    handle_naglowek_scena = \
-    handle_nazwa_utworu = \
-    handle_nota = \
-    handle_osoba = \
-    handle_pa = \
-    handle_pe = \
-    handle_podtytul = \
-    handle_poezja_cyt = \
-    handle_pr = \
-    handle_pt = \
-    handle_sekcja_asterysk = \
-    handle_sekcja_swiatlo = \
-    handle_separator_linia = \
-    handle_slowo_obce = \
-    handle_srodtytul = \
-    handle_tytul_dziela = \
-    handle_wyroznienie = \
-    handle_texcommand
+        handle_akap_cd = \
+        handle_akap_dialog = \
+        handle_autor_utworu = \
+        handle_dedykacja = \
+        handle_didaskalia = \
+        handle_didask_tekst = \
+        handle_dlugi_cytat = \
+        handle_dzielo_nadrzedne = \
+        handle_lista_osoba = \
+        handle_mat = \
+        handle_miejsce_czas = \
+        handle_motto = \
+        handle_motto_podpis = \
+        handle_naglowek_akt = \
+        handle_naglowek_czesc = \
+        handle_naglowek_listy = \
+        handle_naglowek_osoba = \
+        handle_naglowek_scena = \
+        handle_nazwa_utworu = \
+        handle_nota = \
+        handle_osoba = \
+        handle_pa = \
+        handle_pe = \
+        handle_podtytul = \
+        handle_poezja_cyt = \
+        handle_pr = \
+        handle_pt = \
+        handle_sekcja_asterysk = \
+        handle_sekcja_swiatlo = \
+        handle_separator_linia = \
+        handle_slowo_obce = \
+        handle_srodtytul = \
+        handle_tytul_dziela = \
+        handle_wyroznienie = \
+        handle_dywiz = \
+        handle_texcommand
+
+    def handle_naglowek_rozdzial(self, element):
+        if not self.options['teacher']:
+            if element.text.startswith((u'Wiedza', u'Zadania', u'SÅowniczek', u'Dla ucznia')):
+                self.state['mute'] = False
+            else:
+                self.state['mute'] = True
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_rozdzial.unmuter = True
+
+    def handle_naglowek_podrozdzial(self, element):
+        self.activity_counter = 0
+        if not self.options['teacher']:
+            if element.text.startswith(u'Dla ucznia'):
+                self.state['mute'] = False
+                return None
+            elif element.text.startswith(u'Dla nauczyciela'):
+                self.state['mute'] = True
+                return None
+            elif self.state['mute']:
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_podrozdzial.unmuter = True
 
     def handle_uwaga(self, _e):
         return None
+
     def handle_extra(self, _e):
         return None
 
+    def handle_nbsp(self, _e):
+        return '<spec cat="tilde" />'
+
     _handle_strofa = cmd("strofa")
 
     def handle_strofa(self, element):
@@ -224,18 +236,24 @@ class EduModule(Xmill):
             'activity_counter': self.activity_counter,
             'sub_gen': True,
         }
-        submill = EduModule(self.options)
+        submill = EduModule(self.options, self.state)
 
-        opis = submill.generate(element.xpath('opis')[0])
+        if element.xpath('opis'):
+            opis = submill.generate(element.xpath('opis')[0])
+        else:
+            opis = ''
 
         n = element.xpath('wskazowki')
-        if n: wskazowki = submill.generate(n[0])
-
-        else: wskazowki = ''
+        if n:
+            wskazowki = submill.generate(n[0])
+        else:
+            wskazowki = ''
         n = element.xpath('pomoce')
 
-        if n: pomoce = submill.generate(n[0])
-        else: pomoce = ''
+        if n:
+            pomoce = submill.generate(n[0])
+        else:
+            pomoce = ''
 
         forma = ''.join(element.xpath('forma/text()'))
 
@@ -243,9 +261,16 @@ class EduModule(Xmill):
 
         counter = self.activity_counter
 
+        if element.getnext().tag == 'aktywnosc' or (self.activity_last and self.activity_last.getnext() == element):
+            counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
+        else:
+            counter_tex = ''
+
+        self.activity_last = element
+
         return u"""
 <cmd name="noindent" />
-<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
+%(counter_tex)s
 <cmd name="activityinfo"><parm>
  <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
  <cmd name="activityform"><parm>%(forma)s</parm></cmd>
@@ -271,25 +296,28 @@ class EduModule(Xmill):
     def handle_forma(self, *_):
         return
 
-    def handle_lista(self, element, attrs={}):
-        if not element.findall("punkt"):
-            return None
+    def handle_lista(self, element, attrs=None):
         ltype = element.attrib.get('typ', 'punkt')
+        if not element.findall("punkt"):
+            if ltype == 'czytelnia':
+                return 'W przygotowaniu.'
+            else:
+                return None
         if ltype == 'slowniczek':
             surl = element.attrib.get('src', None)
             if surl is None:
                 # print '** missing src on <slowniczek>, setting default'
-                surl = 'http://edukacjamedialna.edu.pl/slowniczek'
-            sxml = None
-            if surl:
-                sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
-            self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
-
-        listcmd = {'num': 'enumerate',
-               'punkt': 'itemize',
-               'alfa': 'itemize',
-               'slowniczek': 'itemize',
-               'czytelnia': 'itemize'}[ltype]
+                surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+            sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
+            self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
+
+        listcmd = {
+            'num': 'enumerate',
+            'punkt': 'itemize',
+            'alfa': 'itemize',
+            'slowniczek': 'itemize',
+            'czytelnia': 'itemize'
+        }[ltype]
 
         return u'<env name="%s">' % listcmd, u'</env>'
 
@@ -308,10 +336,10 @@ class EduModule(Xmill):
 
         typ = element.attrib['typ']
         self.exercise_counter += 1
-        if not typ in exercise_handlers:
+        if typ not in exercise_handlers:
             return '(no handler)'
         self.options = {'exercise_counter': self.exercise_counter}
-        handler = exercise_handlers[typ](self.options)
+        handler = exercise_handlers[typ](self.options, self.state)
         return handler.generate(element)
 
     # XXX this is copied from pyhtml.py, except for return and
@@ -324,11 +352,14 @@ class EduModule(Xmill):
         if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
             sxml = self.options['slowniczek_xml']
             assert element.text != ''
-            defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+            if "'" in (element.text or ''):
+                defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
+            else:
+                defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
             if defloc:
                 definiens = defloc[0].getnext()
                 if definiens.tag == 'definiens':
-                    subgen = EduModule(self.options)
+                    subgen = EduModule(self.options, self.state)
                     definiens_s = subgen.generate(definiens)
 
         return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
@@ -347,14 +378,13 @@ class EduModule(Xmill):
                 max_col = len(ks)
         self.options = {'columnts': max_col}
         # styling:
-                #        has_frames = int(element.attrib.get("ramki", "0"))
-                #        if has_frames: frames_c = "framed"
-                #        else: frames_c = ""
-                #        return u"""<table class="%s">""" % frames_c, u"</table>"
+        #     has_frames = int(element.attrib.get("ramki", "0"))
+        #     if has_frames: frames_c = "framed"
+        #     else: frames_c = ""
+        #     return u"""<table class="%s">""" % frames_c, u"</table>"
         return u'''
 <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
-    ''' % ('l' * max_col), \
-    u'''<cmd name="end"><parm>tabular</parm></cmd>'''
+    ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
 
     @escape(1)
     def handle_wiersz(self, element):
@@ -378,8 +408,9 @@ class EduModule(Xmill):
 
     def handle_obraz(self, element):
         frmt = self.options['format']
-        name = element.attrib['nazwa'].strip()
+        name = element.attrib.get('nazwa', '').strip()
         image = frmt.get_image(name.strip())
+        name = image.get_filename().rsplit('/', 1)[-1]
         img_path = "obraz/%s" % name.replace("_", "")
         frmt.attachments[img_path] = image
         return cmd("obraz", parms=[img_path])(self)
@@ -394,8 +425,7 @@ class EduModule(Xmill):
             print '!! unknown <video> url scheme:', url
             return
         name = m.group(1)
-        thumb = IOFile.from_string(urlopen
-            ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
+        thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
         img_path = "video/%s.jpg" % name.replace("_", "")
         self.options['format'].attachments[img_path] = thumb
         canon_url = "https://www.youtube.com/watch?v=%s" % name
@@ -406,6 +436,7 @@ class Exercise(EduModule):
     def __init__(self, *args, **kw):
         self.question_counter = 0
         super(Exercise, self).__init__(*args, **kw)
+        self.piece_counter = None
 
     handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
 
@@ -427,7 +458,7 @@ class Exercise(EduModule):
         # Add a single <pytanie> tag if it's not there
         if not element.xpath(".//pytanie"):
             qpre, qpost = self.handle_pytanie(element)
-            pre = pre + qpre
+            pre += qpre
             post = qpost + post
         return pre, post
 
@@ -463,7 +494,6 @@ class Exercise(EduModule):
             return self.solution_header() + etree.tostring(par)
 
 
-
 class Wybor(Exercise):
     def handle_cwiczenie(self, element):
         pre, post = super(Wybor, self).handle_cwiczenie(element)
@@ -472,13 +502,14 @@ class Wybor(Exercise):
         if not pytania:
             pytania = [element]
         for p in pytania:
-            solutions = re.split(r"[, ]+", p.attrib['rozw'])
+            solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
             if len(solutions) != 1:
                 is_single_choice = False
                 break
             choices = p.xpath(".//*[@nazwa]")
             uniq = set()
-            for n in choices: uniq.add(n.attrib['nazwa'])
+            for n in choices:
+                uniq.add(n.attrib.get('nazwa', ''))
             if len(choices) != len(uniq):
                 is_single_choice = False
                 break
@@ -555,7 +586,7 @@ class Zastap(Luki):
         return question.xpath(".//zastap")
 
     def solution(self, piece):
-        return piece.attrib['rozw']
+        return piece.attrib.get('rozw', '')
 
     def list_header(self):
         return u"Elementy do wstawienia"
@@ -578,18 +609,19 @@ class PrawdaFalsz(Exercise):
         return pre, post
 
 
-
 def fix_lists(tree):
     lists = tree.xpath(".//lista")
     for l in lists:
         if l.text:
             p = l.getprevious()
             if p is not None:
-                if p.tail is None: p.tail = ''
+                if p.tail is None:
+                    p.tail = ''
                 p.tail += l.text
             else:
                 p = l.getparent()
-                if p.text is None: p.text = ''
+                if p.text is None:
+                    p.text = ''
                 p.text += l.text
             l.text = ''
     return tree
@@ -599,6 +631,9 @@ class EduModulePDFFormat(PDFFormat):
     style = get_resource('res/styles/edumed/pdf/edumed.sty')
 
     def get_texml(self):
+        substitute_hyphens(self.wldoc.edoc)
+        fix_hanging(self.wldoc.edoc)
+
         self.attachments = {}
         edumod = EduModule({
             "wldoc": self.wldoc,
@@ -619,4 +654,3 @@ class EduModulePDFFormat(PDFFormat):
 
     def get_image(self, name):
         return self.wldoc.source.attachments[name]
-