X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/dae85cc198c36d58dd2bdd6eaf29f69a5e3cacaa..565aa32ff31bdbe6e2aaf9f47b124ac1f883a990:/librarian/pyhtml.py

diff --git a/librarian/pyhtml.py b/librarian/pyhtml.py
index de04fc0..163d11c 100644
--- a/librarian/pyhtml.py
+++ b/librarian/pyhtml.py
@@ -4,17 +4,21 @@
 # Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
 from lxml import etree
-from librarian import IOFile, RDFNS, DCNS, Format
+from librarian import IOFile, Format
 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
 from librarian import functions
 import re
 import random
+from copy import deepcopy
+
+IMAGE_THUMB_WIDTH = 300
 
 
 class EduModule(Xmill):
     def __init__(self, options=None):
         super(EduModule, self).__init__(options)
         self.activity_counter = 0
+        self.activity_last = None
         self.exercise_counter = 0
 
         # text filters
@@ -23,7 +27,7 @@ class EduModule(Xmill):
                 txt = txt.replace("/\n", "<br/>\n")
             return txt
         self.register_text_filter(functions.substitute_entities)
-        self.register_text_filter(swap_endlines)
+        self.register_escaped_text_filter(swap_endlines)
 
     @tagged('div', 'stanza')
     def handle_strofa(self, element):
@@ -45,7 +49,7 @@ class EduModule(Xmill):
     handle_podtytul = tag("span", "subtitle")
     handle_naglowek_akt = handle_naglowek_czesc = handle_srodtytul = tag("h2")
     handle_naglowek_scena = tag('h2')
-    handle_naglowek_osoba = handle_naglowek_podrozdzial = tag('h3')
+    handle_naglowek_osoba = tag('h3')
     handle_akap = handle_akap_dialog = handle_akap_cd = tag('p', 'paragraph')
 
     handle_wyroznienie = tag('em')
@@ -53,7 +57,7 @@ class EduModule(Xmill):
     handle_slowo_obce = tag('em', 'foreign')
 
     def naglowek_to_anchor(self, naglowek):
-        return re.sub(r" +", " ", naglowek.text.strip())
+        return self.options['urlmapper'].naglowek_to_anchor(naglowek)
 
     def handle_nazwa_utworu(self, element):
         toc = []
@@ -65,11 +69,19 @@ class EduModule(Xmill):
             toc.append("<li>%s</li>" % atxt)
         toc = "<ul class='toc'>%s</ul>" % "".join(toc)
         add_header = "Lekcja: " if self.options['wldoc'].book_info.type in ('course', 'synthetic') else ''
-        return "<h1 class='title'>%s" % add_header, "</h1>" + toc
+        return "<h1 class='title' id='top'>%s" % add_header, "</h1>" + toc
 
-    @tagged("h2")
     def handle_naglowek_rozdzial(self, element):
-        return "", "".join(tag_open_close("a", name=self.naglowek_to_anchor(element)))
+        return_to_top = u"<a href='#top' class='top-link'>wrÃ³Ä do spisu treÅci</a>"
+        pre, post = tag_open_close("h2", id=self.naglowek_to_anchor(element))
+        url = self.options['urlmapper'].get_help_url(element)
+        if url:
+            post = " <a class='help' href='%s'>?</a>" % (url,) + post
+        return return_to_top + pre, post
+
+    def handle_naglowek_podrozdzial(self, element):
+        self.activity_counter = 0
+        return tag('h3')(self, element)
 
     def handle_uwaga(self, _e):
         return None
@@ -82,38 +94,66 @@ class EduModule(Xmill):
             }
         submill = EduModule(dict(self.options.items() + {'sub_gen': True}.items()))
 
-        opis = submill.generate(element.xpath('opis')[0])
+        if element.xpath('opis'):
+            opis = submill.generate(element.xpath('opis')[0])
+        else:
+            opis = ''
 
         n = element.xpath('wskazowki')
-        if n: wskazowki = submill.generate(n[0])
-
-        else: wskazowki = ''
+        if n:
+            wskazowki = submill.generate(n[0])
+        else:
+            wskazowki = ''
         n = element.xpath('pomoce')
 
-        if n: pomoce = submill.generate(n[0])
-        else: pomoce = ''
+        if n:
+            pomoce = submill.generate(n[0])
+        else:
+            pomoce = ''
 
         forma = ''.join(element.xpath('forma/text()'))
+        get_forma_url = self.options['urlmapper'].get_forma_url
+        forms = []
+        for form_name in forma.split(','):
+            name = form_name.strip()
+            url = get_forma_url(name)
+            if url:
+                forms.append("<a href='%s'>%s</a>" % (url, name))
+            else:
+                forms.append(name)
+        forma = ', '.join(forms)
+        if forma:
+            forma = '<section class="infobox kind"><h1>Metoda</h1><p>%s</p></section>' % forma
 
         czas = ''.join(element.xpath('czas/text()'))
+        if czas:
+            czas = '<section class="infobox time"><h1>Czas</h1><p>%s min</p></section>' % czas
 
         counter = self.activity_counter
 
-        return u"""
+        if element.getnext().tag == 'aktywnosc' or (self.activity_last and self.activity_last.getnext() == element):
+            counter_html = """<span class="act_counter">%(counter)d.</span>""" % {'counter': counter}
+        else:
+            counter_html = ''
+
+        self.activity_last = element
+
+        return (
+            u"""
 <div class="activity">
- <div class="text">
-  <span class="act_counter">%(counter)d.</span>
-  %(opis)s""" % locals(), \
-u"""%(wskazowki)s
- </div>
- <aside class="info">
-  <section class="infobox time"><h1>Czas</h1><p>%(czas)s min</p></section>
-  <section class="infobox kind"><h1>Metoda</h1><p>%(forma)s</p></section>
-  %(pomoce)s
- </aside>
- <div class="clearboth"></div>
+  <div class="text">
+    %(counter_html)s
+    %(opis)s""" % {'counter_html': counter_html, 'opis': opis},
+            u"""%(wskazowki)s
+  </div>
+  <aside class="info">
+    %(czas)s
+    %(forma)s
+    %(pomoce)s
+  </aside>
+  <div class="clearboth"></div>
 </div>
-""" % locals()
+""" % {'wskazowki': wskazowki, 'czas': czas, 'forma': forma, 'pomoce': pomoce})
 
     handle_opis = ifoption(sub_gen=True)(tag('div', 'description'))
     handle_wskazowki = ifoption(sub_gen=True)(tag('div', ('hints', 'teacher')))
@@ -146,29 +186,41 @@ u"""%(wskazowki)s
         return handler.generate(element)
 
     # Lists
-    def handle_lista(self, element, attrs={}):
+    def handle_lista(self, element, attrs=None):
+        if attrs is None:
+            attrs = {}
         ltype = element.attrib.get('typ', 'punkt')
+        if not element.findall("punkt"):
+            if ltype == 'czytelnia':
+                return '<p>W przygotowaniu.</p>'
+            else:
+                return None
         if ltype == 'slowniczek':
             surl = element.attrib.get('src', None)
             if surl is None:
                 # print '** missing src on <slowniczek>, setting default'
-                surl = 'http://edukacjamedialna.edu.pl/slowniczek'
-            sxml = None
-            if surl:
-                sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
-            self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
-            return '<div class="slowniczek">', '</div>'
-
-        listtag = {'num': 'ol',
-               'punkt': 'ul',
-               'alfa': 'ul',
-               'czytelnia': 'ul'}[ltype]
+                surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+            sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
+
+            self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
+            pre, post = '<div class="slowniczek">', '</div>'
+            if not self.options['wldoc'].book_info.url.slug.startswith('slowniczek'):
+                post += u'<p class="see-more"><a href="%s">Zobacz caÅy sÅowniczek.</a></p>' % surl
+            return pre, post
+
+        listtag = {
+            'num': 'ol',
+            'punkt': 'ul',
+            'alfa': 'ul',
+            'czytelnia': 'ul'}[ltype]
 
         classes = attrs.get('class', '')
-        if classes: del attrs['class']
+        if classes:
+            del attrs['class']
 
         attrs_s = ' '.join(['%s="%s"' % kv for kv in attrs.items()])
-        if attrs_s: attrs_s = ' ' + attrs_s
+        if attrs_s:
+            attrs_s = ' ' + attrs_s
 
         return '<%s class="lista %s %s"%s>' % (listtag, ltype, classes, attrs_s), '</%s>' % listtag
 
@@ -189,16 +241,19 @@ u"""%(wskazowki)s
         # let's pull definiens from another document
         if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
             sxml = self.options['slowniczek_xml']
-            defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+            if "'" in (element.text or ''):
+                defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
+            else:
+                defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
             if defloc:
                 definiens = defloc[0].getnext()
                 if definiens.tag == 'definiens':
                     subgen = EduModule(self.options)
                     definiens_s = subgen.generate(definiens)
             else:
-                print '!! Missing definiendum in source:', element.text
+                print ("!! Missing definiendum in source: '%s'" % element.text).encode('utf-8')
 
-        return u"<dt>", u"</dt>" + definiens_s
+        return u"<dt id='%s'>" % self.naglowek_to_anchor(element), u"</dt>" + definiens_s
 
     def handle_definiens(self, element):
         return u"<dd>", u"</dd>"
@@ -208,8 +263,7 @@ u"""%(wskazowki)s
 
     def handle_tabela(self, element):
         has_frames = int(element.attrib.get("ramki", "0"))
-        if has_frames: frames_c = "framed"
-        else: frames_c = ""
+        frames_c = "framed" if has_frames else ""
         return u"""<table class="%s">""" % frames_c, u"</table>"
 
     def handle_wiersz(self, element):
@@ -228,14 +282,9 @@ u"""%(wskazowki)s
         elif 'material' in element.attrib:
             material_err = u' [BRAKUJÄCY MATERIAÅ]'
             slug = element.attrib['material']
-            make_url = lambda f: self.options['urlmapper'] \
-              .url_for_material(slug, f)
 
-            if 'format' in element.attrib:
-                formats = re.split(r"[, ]+",
-                               element.attrib['format'])
-            else:
-                formats = [None]
+            def make_url(f):
+                return self.options['urlmapper'].url_for_material(slug, f)
 
             formats = self.options['urlmapper'].materials(slug)
 
@@ -255,12 +304,30 @@ u"""%(wskazowki)s
 
             return u"<a href='%s'>" % def_href, u'%s</a>%s' % (def_err, more_links)
 
+    def handle_obraz(self, element):
+        name = element.attrib.get('nazwa', '').strip()
+        if not name:
+            print '!! <obraz> missing "nazwa"'
+            return
+        alt = element.attrib.get('alt', '')
+        if not alt:
+            print '** <obraz> missing "alt"'
+        slug, ext = name.rsplit('.', 1)
+        url = self.options['urlmapper'].url_for_image(slug, ext)
+        thumb_url = self.options['urlmapper'].url_for_image(slug, ext, IMAGE_THUMB_WIDTH)
+        e = etree.Element("a", attrib={"href": url, "class": "image"})
+        e.append(etree.Element("img", attrib={
+            "src": thumb_url,
+            "alt": alt,
+            "width": str(IMAGE_THUMB_WIDTH)}))
+        return etree.tostring(e, encoding=unicode), u""
+
     def handle_video(self, element):
         url = element.attrib.get('url')
         if not url:
             print '!! <video> missing url'
             return
-        m = re.match(r'https?://(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
+        m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
         if not m:
             print '!! unknown <video> url scheme:', url
             return
@@ -270,10 +337,12 @@ u"""%(wskazowki)s
 
 class Exercise(EduModule):
     INSTRUCTION = ""
+
     def __init__(self, *args, **kw):
         self.question_counter = 0
         super(Exercise, self).__init__(*args, **kw)
         self.instruction_printed = False
+        self.piece_counter = None
 
     @tagged('div', 'description')
     def handle_opis(self, element):
@@ -314,7 +383,7 @@ class Exercise(EduModule):
         # Add a single <pytanie> tag if it's not there
         if not element.xpath(".//pytanie"):
             qpre, qpost = self.handle_pytanie(element)
-            pre = pre + qpre
+            pre += qpre
             post = qpost + post
         return pre, post
 
@@ -325,8 +394,7 @@ class Exercise(EduModule):
         self.question_counter += 1
         self.piece_counter = 0
         solution = element.attrib.get('rozw', None)
-        if solution: solution_s = ' data-solution="%s"' % solution
-        else: solution_s = ''
+        solution_s = ' data-solution="%s"' % solution if solution else ''
 
         handles = element.attrib.get('uchwyty', None)
         if handles:
@@ -334,8 +402,7 @@ class Exercise(EduModule):
             self.options = {'handles': handles}
 
         minimum = element.attrib.get('min', None)
-        if minimum: minimum_s = ' data-minimum="%d"' % int(minimum)
-        else: minimum_s = ''
+        minimum_s = ' data-minimum="%d"' % int(minimum) if minimum else ''
 
         return '<div class="question%s" data-no="%d" %s>' %\
             (add_class, self.question_counter, solution_s + minimum_s), \
@@ -344,14 +411,15 @@ class Exercise(EduModule):
     def get_instruction(self):
         if not self.instruction_printed:
             self.instruction_printed = True
-            return u'<span class="instruction">%s</span>' % self.INSTRUCTION
+            if self.INSTRUCTION:
+                return u'<span class="instruction">%s</span>' % self.INSTRUCTION
+            else:
+                return ""
         else:
             return ""
 
 
-
 class Wybor(Exercise):
-    INSTRUCTION = None
     def handle_cwiczenie(self, element):
         pre, post = super(Wybor, self).handle_cwiczenie(element)
         is_single_choice = True
@@ -359,13 +427,14 @@ class Wybor(Exercise):
         if not pytania:
             pytania = [element]
         for p in pytania:
-            solutions = re.split(r"[, ]+", p.attrib['rozw'])
+            solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
             if len(solutions) != 1:
                 is_single_choice = False
                 break
             choices = p.xpath(".//*[@nazwa]")
             uniq = set()
-            for n in choices: uniq.add(n.attrib['nazwa'])
+            for n in choices:
+                uniq.add(n.attrib.get('nazwa', ''))
             if len(choices) != len(uniq):
                 is_single_choice = False
                 break
@@ -413,23 +482,22 @@ Overrides the returned content default handle_pytanie
             u"""</div>"""
 
     def handle_punkt(self, element):
-        return """<li class="question-piece" data-pos="%(rozw)s"/>""" \
+        return """<li class="question-piece" data-pos="%(rozw)s">""" \
             % element.attrib,\
             "</li>"
 
 
 class Luki(Exercise):
     INSTRUCTION = u"PrzeciÄgnij odpowiedzi i upuÅÄ w wybranym polu."
+
     def find_pieces(self, question):
         return question.xpath(".//luka")
 
     def solution_html(self, piece):
+        piece = deepcopy(piece)
+        piece.tail = None
         sub = EduModule()
         return sub.generate(piece)
-        # print piece.text
-        # return piece.text + ''.join(
-        #     [etree.tostring(n, encoding=unicode)
-        #      for n in piece])
 
     def handle_pytanie(self, element):
         qpre, qpost = super(Luki, self).handle_pytanie(element)
@@ -461,7 +529,7 @@ class Zastap(Luki):
         return question.xpath(".//zastap")
 
     def solution_html(self, piece):
-        return piece.attrib['rozw']
+        return piece.attrib.get('rozw', '')
 
     def handle_zastap(self, element):
         self.piece_counter += 1
@@ -474,7 +542,6 @@ class Przyporzadkuj(Exercise):
                    u"Kliknij numer odpowiedzi, przeciÄgnij i upuÅÄ w wybranym polu."]
 
     def get_instruction(self):
-        print self.options['handles']
         if not self.instruction_printed:
             self.instruction_printed = True
             return u'<span class="instruction">%s</span>' % self.INSTRUCTION[self.options['handles'] and 1 or 0]
@@ -507,6 +574,8 @@ class Przyporzadkuj(Exercise):
                 'data-target': lista.attrib['cel'],
                 'class': 'subject'
             }
+            if lista.attrib.get('krotkie'):
+                self.options = {'short': True}
             self.options = {'subject': True}
         else:
             attrs = {}
@@ -517,16 +586,29 @@ class Przyporzadkuj(Exercise):
         if self.options['subject']:
             self.piece_counter += 1
             if self.options['handles']:
-                return '<li><span data-solution="%s" data-no="%s" class="question-piece draggable handle add-li">%s</span>' % (element.attrib['rozw'], self.piece_counter, self.piece_counter), '</li>'
+                return (
+                    '<li><span data-solution="%s" data-no="%s" '
+                    'class="question-piece draggable handle add-li">%s</span>' % (
+                        element.attrib.get('rozw', ''),
+                        self.piece_counter,
+                        self.piece_counter),
+                    '</li>')
             else:
-                return '<li data-solution="%s" data-no="%s" class="question-piece draggable">' % (element.attrib['rozw'], self.piece_counter), '</li>'
+                extra_class = ""
+                if self.options['short']:
+                    extra_class += ' short'
+                return '<li data-solution="%s" data-no="%s" class="question-piece draggable%s">' % (
+                    element.attrib.get('rozw', ''),
+                    self.piece_counter, extra_class), '</li>'
 
         elif self.options['predicate']:
             if self.options['min']:
-                placeholders = u'<li class="placeholder"/>' * self.options['min']
+                placeholders = u'<li class="placeholder"></li>' * self.options['min']
             else:
-                placeholders = u'<li class="placeholder multiple"/>'
-            return '<li data-predicate="%(nazwa)s">' % element.attrib, '<ul class="subjects">' + placeholders + '</ul></li>'
+                placeholders = u'<li class="placeholder multiple"></li>'
+            return (
+                '<li data-predicate="%s">' % element.attrib.get('nazwa', ''),
+                '<ul class="subjects">' + placeholders + '</ul></li>')
 
         else:
             return super(Przyporzadkuj, self).handle_punkt(element)
@@ -552,6 +634,7 @@ class EduModuleFormat(Format):
 
     def __init__(self, wldoc, **kwargs):
         super(EduModuleFormat, self).__init__(wldoc, **kwargs)
+        self.materials_by_slug = None
 
     def build(self):
         # Sort materials by slug.
@@ -573,17 +656,33 @@ class EduModuleFormat(Format):
 
     def materials(self, slug):
         """Returns a list of pairs: (ext, iofile)."""
-        order = dict(reversed(k) for k in enumerate(self.PRIMARY_MATERIAL_FORMATS))
+        order = {pmf: i for (i, pmf) in enumerate(self.PRIMARY_MATERIAL_FORMATS)}
         mats = self.materials_by_slug.get(slug, {}).items()
         if not mats:
-            print "!! Material missing: '%s'" % slug
+            print ("!! Material missing: '%s'" % slug).encode('utf-8')
         return sorted(mats, key=lambda (x, y): order.get(x, x))
 
     def url_for_material(self, slug, fmt):
         return "%s.%s" % (slug, fmt)
 
+    # WTF: tutaj byÅ bÅÄd, ale nikomu to nie przeszkadzaÅo?
+    def url_for_image(self, slug, fmt, width=None):
+        return self.url_for_material(slug, fmt)
+
+    def text_to_anchor(self, text):
+        return re.sub(r" +", " ", text)
+
+    def naglowek_to_anchor(self, naglowek):
+        return self.text_to_anchor(naglowek.text.strip())
+
+    def get_forma_url(self, forma):
+        return None
+
+    def get_help_url(self, naglowek):
+        return None
+
 
-def transform(wldoc, stylesheet='edumed', options=None, flags=None):
+def transform(wldoc, stylesheet='edumed', options=None, flags=None, verbose=None):
     """Transforms the WL document to XHTML.
 
     If output_filename is None, returns an XML,