verbose curriculum levels in model

[librarian.git] / librarian / pypdf.py
diff --git a/librarian/pypdf.py b/librarian/pypdf.py

index 6d6e0aa..256bc34 100644 (file)
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,30 +9,19 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import with_statement
  from copy import deepcopy
-import os
  import os.path
  import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
  import re
  import random
-from copy import deepcopy
-from subprocess import call, PIPE
  from urllib2 import urlopen
  
-from Texml.processor import process
  from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
  
-from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
-from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from xmlutils import Xmill, ifoption, tag_open_close
+from librarian import DCNS, get_resource, IOFile
  from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
  
  
  def escape(really):
@@ -40,10 +29,11 @@ def escape(really):
          def _wrap(*args, **kw):
              value = f(*args, **kw)
  
-            prefix = (u'<TeXML escape="%d">' % (really and 1 or 0))
+            prefix = (u'<TeXML escape="%d">' % (1 if really else 0))
              postfix = u'</TeXML>'
              if isinstance(value, list):
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
              if isinstance(value, tuple):
                  return prefix + value[0], value[1] + postfix
              else:
@@ -76,36 +66,36 @@ def mark_alien_characters(text):
  
  
  class EduModule(Xmill):
-    def __init__(self, options=None):
-        super(EduModule, self).__init__(options)
+    def __init__(self, options=None, state=None):
+        super(EduModule, self).__init__(options, state)
          self.activity_counter = 0
+        self.activity_last = None
          self.exercise_counter = 0
  
          def swap_endlines(txt):
              if self.options['strofa']:
                  txt = txt.replace("/\n", '<ctrl ch="\\"/>')
              return txt
+        self.register_text_filter(swap_endlines)
          self.register_text_filter(functions.substitute_entities)
          self.register_text_filter(mark_alien_characters)
-        self.register_text_filter(swap_endlines)
  
      def get_dc(self, element, dc_field, single=False):
          values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
          if single:
-            return values[0]
+            return values[0] if len(values) else ''
          return values
  
      def handle_rdf__RDF(self, _):
-        "skip metadata in generation"
+        """skip metadata in generation"""
          return
  
      @escape(True)
      def get_rightsinfo(self, element):
          rights_lic = self.get_dc(element, 'rights.license', True)
-        return u'<cmd name="rightsinfostr">' + \
-          (rights_lic and u'<opt>%s</opt>' % rights_lic or '') +\
-          u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) +\
-          u'</cmd>'
+        return u'<cmd name="rightsinfostr">' + (u'<opt>%s</opt>' % rights_lic if rights_lic else '') + \
+            u'<parm>%s</parm>' % self.get_dc(element, 'rights', True) + \
+            u'</cmd>'
  
      @escape(True)
      def get_authors(self, element, which=None):
@@ -116,95 +106,158 @@ class EduModule(Xmill):
                  dc.authors_expert
          else:
              authors = getattr(dc, "authors_%s" % which)
-        return u', '.join(author.readable() for author in authors)
+        return u', '.join(author.readable() for author in authors if author)
  
-    @escape(1)
+    @escape(True)
      def get_title(self, element):
          return self.get_dc(element, 'title', True)
  
+    @escape(True)
+    def get_description(self, element):
+        desc = self.get_dc(element, 'description', single=True)
+        if not desc:
+            print '!! no description'
+        return desc
+
+    @escape(True)
+    def get_curriculum(self, element):
+        ret = []
+        for dc_tag, new in [('subject.curriculum', False), ('subject.curriculum.new', True)]:
+            identifiers = self.get_dc(element, dc_tag)
+            if not identifiers:
+                continue
+            try:
+                from curriculum.templatetags.curriculum_tags import curriculum
+                curr_elements = curriculum(identifiers)
+            except ImportError:
+                curr_elements = {'identifiers': identifiers}
+            items = ['Podstawa programowa 2017:' if new else 'Podstawa programowa:']
+            newline = '<ctrl ch="\\"/>\n'
+            if 'currset' in curr_elements:
+                for (course, level), types in curr_elements['currset'].iteritems():
+                    lines = [u'%s, %s' % (course, level)]
+                    for type, currs in types.iteritems():
+                        lines.append(type)
+                        lines += [curr.title for curr in currs]
+                    items.append(newline.join(lines))
+            else:
+                items += identifiers
+            ret.append('\n<cmd name="vspace"><parm>.6em</parm></cmd>\n'.join(
+                '<cmd name="akap"><parm>%s</parm></cmd>' % item for item in items))
+        return '\n<cmd name="vspace"><parm>1em</parm></cmd>\n'.join(ret)
+
      def handle_utwor(self, element):
          lines = [
              u'''
-    <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
-        <TeXML escape="0">
-        \\documentclass[%s]{wl}
-        \\usepackage{style}''' % self.options['customization_str'],
-    self.options['has_cover'] and '\usepackage{makecover}',
-    (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
-    (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
-    (self.options['morefloats'] == 'none' and
-     u'''\\IfFileExists{morefloats.sty}{
-            \\usepackage{morefloats}
-        }{}'''),
-    u'''\\def\\authors{%s}''' % self.get_authors(element),
-    u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
-    u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
-    u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
-    
-    u'''\\author{\\authors}''',
-    u'''\\title{%s}''' % self.get_title(element),
-    u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
-    u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
-    u'</TeXML>']
+                <TeXML xmlns="http://getfo.sourceforge.net/texml/ns1">
+                <TeXML escape="0">
+                \\documentclass[%s]{wl}
+                \\usepackage{style}''' % self.options['customization_str'],
+            self.options['has_cover'] and '\usepackage{makecover}',
+            (self.options['morefloats'] == 'new' and '\usepackage[maxfloats=64]{morefloats}') or
+            (self.options['morefloats'] == 'old' and '\usepackage{morefloats}') or
+            (self.options['morefloats'] == 'none' and
+                u'''\\IfFileExists{morefloats.sty}{
+                \\usepackage{morefloats}
+                }{}'''),
+            u'''\\def\\authors{%s}''' % self.get_authors(element),
+            u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+            u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+            u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+            u'''\\def\\description{%s}''' % self.get_description(element),
+
+            u'''\\author{\\authors}''',
+            u'''\\title{%s}''' % self.get_title(element),
+            u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
+            u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
+            u'''\\def\\curriculum{%s}''' % self.get_curriculum(element),
+            u'</TeXML>'
+        ]
  
          return u"".join(filter(None, lines)), u'</TeXML>'
  
-
-    @escape(1)
+    @escape(True)
      def handle_powiesc(self, element):
          return u"""
      <env name="document">
      <cmd name="maketitle"/>
      """, """<cmd name="editorialsection" /></env>"""
  
-    @escape(1)
+    @escape(True)
      def handle_texcommand(self, element):
          cmd = functions.texcommand(element.tag)
          return u'<TeXML escape="1"><cmd name="%s"><parm>' % cmd, u'</parm></cmd></TeXML>'
  
      handle_akap = \
-    handle_akap = \
-    handle_akap_cd = \
-    handle_akap_cd = \
-    handle_akap_dialog = \
-    handle_akap_dialog = \
-    handle_autor_utworu = \
-    handle_dedykacja = \
-    handle_didaskalia = \
-    handle_didask_tekst = \
-    handle_dlugi_cytat = \
-    handle_dzielo_nadrzedne = \
-    handle_lista_osoba = \
-    handle_mat = \
-    handle_miejsce_czas = \
-    handle_motto = \
-    handle_motto_podpis = \
-    handle_naglowek_akt = \
-    handle_naglowek_czesc = \
-    handle_naglowek_listy = \
-    handle_naglowek_osoba = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_rozdzial = \
-    handle_naglowek_rozdzial = \
-    handle_naglowek_scena = \
-    handle_nazwa_utworu = \
-    handle_nota = \
-    handle_osoba = \
-    handle_pa = \
-    handle_pe = \
-    handle_podtytul = \
-    handle_poezja_cyt = \
-    handle_pr = \
-    handle_pt = \
-    handle_sekcja_asterysk = \
-    handle_sekcja_swiatlo = \
-    handle_separator_linia = \
-    handle_slowo_obce = \
-    handle_srodtytul = \
-    handle_tytul_dziela = \
-    handle_wyroznienie = \
-    handle_texcommand
+        handle_akap_cd = \
+        handle_akap_dialog = \
+        handle_autor_utworu = \
+        handle_dedykacja = \
+        handle_didaskalia = \
+        handle_didask_tekst = \
+        handle_dlugi_cytat = \
+        handle_dzielo_nadrzedne = \
+        handle_lista_osoba = \
+        handle_mat = \
+        handle_miejsce_czas = \
+        handle_motto = \
+        handle_motto_podpis = \
+        handle_naglowek_akt = \
+        handle_naglowek_czesc = \
+        handle_naglowek_listy = \
+        handle_naglowek_osoba = \
+        handle_naglowek_scena = \
+        handle_nazwa_utworu = \
+        handle_nota = \
+        handle_osoba = \
+        handle_pa = \
+        handle_pe = \
+        handle_podtytul = \
+        handle_poezja_cyt = \
+        handle_pr = \
+        handle_pt = \
+        handle_sekcja_asterysk = \
+        handle_sekcja_swiatlo = \
+        handle_separator_linia = \
+        handle_slowo_obce = \
+        handle_srodtytul = \
+        handle_tytul_dziela = \
+        handle_wyroznienie = \
+        handle_dywiz = \
+        handle_texcommand
+
+    def handle_naglowek_rozdzial(self, element):
+        if not self.options['teacher']:
+            if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
+                self.state['mute'] = False
+            else:
+                self.state['mute'] = True
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_rozdzial.unmuter = True
+
+    def handle_naglowek_podrozdzial(self, element):
+        self.activity_counter = 0
+        if not self.options['teacher']:
+            if element.text.startswith(u'Dla ucznia'):
+                self.state['mute'] = False
+                return None
+            elif element.text.startswith(u'Dla nauczyciela'):
+                self.state['mute'] = True
+                return None
+            elif self.state['mute']:
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_podrozdzial.unmuter = True
+
+    def handle_uwaga(self, _e):
+        return None
+
+    def handle_extra(self, _e):
+        return None
+
+    def handle_nbsp(self, _e):
+        return '<spec cat="tilde" />'
  
      _handle_strofa = cmd("strofa")
  
@@ -219,18 +272,24 @@ class EduModule(Xmill):
              'activity_counter': self.activity_counter,
              'sub_gen': True,
          }
-        submill = EduModule(self.options)
+        submill = EduModule(self.options, self.state)
  
-        opis = submill.generate(element.xpath('opis')[0])
+        if element.xpath('opis'):
+            opis = submill.generate(element.xpath('opis')[0])
+        else:
+            opis = ''
  
          n = element.xpath('wskazowki')
-        if n: wskazowki = submill.generate(n[0])
-
-        else: wskazowki = ''
+        if n:
+            wskazowki = submill.generate(n[0])
+        else:
+            wskazowki = ''
          n = element.xpath('pomoce')
  
-        if n: pomoce = submill.generate(n[0])
-        else: pomoce = ''
+        if n:
+            pomoce = submill.generate(n[0])
+        else:
+            pomoce = ''
  
          forma = ''.join(element.xpath('forma/text()'))
  
@@ -238,9 +297,16 @@ class EduModule(Xmill):
  
          counter = self.activity_counter
  
+        if element.getnext().tag == 'aktywnosc' or (len(self.activity_last) and self.activity_last.getnext() == element):
+            counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
+        else:
+            counter_tex = ''
+
+        self.activity_last = element
+
          return u"""
  <cmd name="noindent" />
-<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
+%(counter_tex)s
  <cmd name="activityinfo"><parm>
   <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
   <cmd name="activityform"><parm>%(forma)s</parm></cmd>
@@ -266,25 +332,28 @@ class EduModule(Xmill):
      def handle_forma(self, *_):
          return
  
-    def handle_lista(self, element, attrs={}):
-        if not element.findall("punkt"):
-            return None
+    def handle_lista(self, element, attrs=None):
          ltype = element.attrib.get('typ', 'punkt')
+        if not element.findall("punkt"):
+            if ltype == 'czytelnia':
+                return 'W przygotowaniu.'
+            else:
+                return None
          if ltype == 'slowniczek':
              surl = element.attrib.get('src', None)
              if surl is None:
                  # print '** missing src on <slowniczek>, setting default'
-                surl = 'http://edukacjamedialna.edu.pl/slowniczek'
-            sxml = None
-            if surl:
-                sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
-            self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
-
-        listcmd = {'num': 'enumerate',
-               'punkt': 'itemize',
-               'alfa': 'itemize',
-               'slowniczek': 'itemize',
-               'czytelnia': 'itemize'}[ltype]
+                surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+            sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
+            self.options = {'slowniczek': True, 'slowniczek_xml': sxml}
+
+        listcmd = {
+            'num': 'enumerate',
+            'punkt': 'itemize',
+            'alfa': 'itemize',
+            'slowniczek': 'itemize',
+            'czytelnia': 'itemize'
+        }[ltype]
  
          return u'<env name="%s">' % listcmd, u'</env>'
  
@@ -303,10 +372,10 @@ class EduModule(Xmill):
  
          typ = element.attrib['typ']
          self.exercise_counter += 1
-        if not typ in exercise_handlers:
+        if typ not in exercise_handlers:
              return '(no handler)'
          self.options = {'exercise_counter': self.exercise_counter}
-        handler = exercise_handlers[typ](self.options)
+        handler = exercise_handlers[typ](self.options, self.state)
          return handler.generate(element)
  
      # XXX this is copied from pyhtml.py, except for return and
@@ -319,11 +388,14 @@ class EduModule(Xmill):
          if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
              sxml = self.options['slowniczek_xml']
              assert element.text != ''
-            defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+            if "'" in (element.text or ''):
+                defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
+            else:
+                defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
              if defloc:
                  definiens = defloc[0].getnext()
                  if definiens.tag == 'definiens':
-                    subgen = EduModule(self.options)
+                    subgen = EduModule(self.options, self.state)
                      definiens_s = subgen.generate(definiens)
  
          return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
@@ -342,20 +414,19 @@ class EduModule(Xmill):
                  max_col = len(ks)
          self.options = {'columnts': max_col}
          # styling:
-                #        has_frames = int(element.attrib.get("ramki", "0"))
-                #        if has_frames: frames_c = "framed"
-                #        else: frames_c = ""
-                #        return u"""<table class="%s">""" % frames_c, u"</table>"
+        #     has_frames = int(element.attrib.get("ramki", "0"))
+        #     if has_frames: frames_c = "framed"
+        #     else: frames_c = ""
+        #     return u"""<table class="%s">""" % frames_c, u"</table>"
          return u'''
  <cmd name="begin"><parm>tabular</parm><parm>%s</parm></cmd>
-    ''' % ('l' * max_col), \
-    u'''<cmd name="end"><parm>tabular</parm></cmd>'''
+    ''' % ('l' * max_col), u'''<cmd name="end"><parm>tabular</parm></cmd>'''
  
-    @escape(1)
+    @escape(True)
      def handle_wiersz(self, element):
          return u"", u'<ctrl ch="\\"/>'
  
-    @escape(1)
+    @escape(True)
      def handle_kol(self, element):
          if element.getnext() is not None:
              return u"", u'<spec cat="align" />'
@@ -373,8 +444,9 @@ class EduModule(Xmill):
  
      def handle_obraz(self, element):
          frmt = self.options['format']
-        name = element.attrib['nazwa'].strip()
+        name = element.attrib.get('nazwa', '').strip()
          image = frmt.get_image(name.strip())
+        name = image.get_filename().rsplit('/', 1)[-1]
          img_path = "obraz/%s" % name.replace("_", "")
          frmt.attachments[img_path] = image
          return cmd("obraz", parms=[img_path])(self)
@@ -389,8 +461,7 @@ class EduModule(Xmill):
              print '!! unknown <video> url scheme:', url
              return
          name = m.group(1)
-        thumb = IOFile.from_string(urlopen
-            ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
+        thumb = IOFile.from_string(urlopen("http://img.youtube.com/vi/%s/0.jpg" % name).read())
          img_path = "video/%s.jpg" % name.replace("_", "")
          self.options['format'].attachments[img_path] = thumb
          canon_url = "https://www.youtube.com/watch?v=%s" % name
@@ -401,6 +472,7 @@ class Exercise(EduModule):
      def __init__(self, *args, **kw):
          self.question_counter = 0
          super(Exercise, self).__init__(*args, **kw)
+        self.piece_counter = None
  
      handle_rozw_kom = ifoption(teacher=True)(cmd('akap'))
  
@@ -422,7 +494,7 @@ class Exercise(EduModule):
          # Add a single <pytanie> tag if it's not there
          if not element.xpath(".//pytanie"):
              qpre, qpost = self.handle_pytanie(element)
-            pre = pre + qpre
+            pre += qpre
              post = qpost + post
          return pre, post
  
@@ -458,7 +530,6 @@ class Exercise(EduModule):
              return self.solution_header() + etree.tostring(par)
  
  
-
  class Wybor(Exercise):
      def handle_cwiczenie(self, element):
          pre, post = super(Wybor, self).handle_cwiczenie(element)
@@ -467,23 +538,19 @@ class Wybor(Exercise):
          if not pytania:
              pytania = [element]
          for p in pytania:
-            solutions = re.split(r"[, ]+", p.attrib['rozw'])
+            solutions = p.xpath(".//punkt[@rozw='prawda']")
              if len(solutions) != 1:
                  is_single_choice = False
                  break
-            choices = p.xpath(".//*[@nazwa]")
-            uniq = set()
-            for n in choices: uniq.add(n.attrib['nazwa'])
-            if len(choices) != len(uniq):
-                is_single_choice = False
-                break
  
          self.options = {'single': is_single_choice}
          return pre, post
  
      def handle_punkt(self, element):
-        if self.options['exercise'] and element.attrib.get('nazwa', None):
+        if self.options['exercise'] and element.attrib.get('rozw', None):
              cmd = 'radio' if self.options['single'] else 'checkbox'
+            if self.options['teacher'] and element.attrib['rozw'] == 'prawda':
+                cmd += 'checked'
              return u'<cmd name="%s"/>' % cmd, ''
          else:
              return super(Wybor, self).handle_punkt(element)
@@ -550,7 +617,7 @@ class Zastap(Luki):
          return question.xpath(".//zastap")
  
      def solution(self, piece):
-        return piece.attrib['rozw']
+        return piece.attrib.get('rozw', '')
  
      def list_header(self):
          return u"Elementy do wstawienia"
@@ -573,18 +640,19 @@ class PrawdaFalsz(Exercise):
          return pre, post
  
  
-
  def fix_lists(tree):
      lists = tree.xpath(".//lista")
      for l in lists:
          if l.text:
              p = l.getprevious()
              if p is not None:
-                if p.tail is None: p.tail = ''
+                if p.tail is None:
+                    p.tail = ''
                  p.tail += l.text
              else:
                  p = l.getparent()
-                if p.text is None: p.text = ''
+                if p.text is None:
+                    p.text = ''
                  p.text += l.text
              l.text = ''
      return tree
@@ -594,6 +662,9 @@ class EduModulePDFFormat(PDFFormat):
      style = get_resource('res/styles/edumed/pdf/edumed.sty')
  
      def get_texml(self):
+        substitute_hyphens(self.wldoc.edoc)
+        fix_hanging(self.wldoc.edoc)
+
          self.attachments = {}
          edumod = EduModule({
              "wldoc": self.wldoc,
@@ -614,4 +685,3 @@ class EduModulePDFFormat(PDFFormat):
  
      def get_image(self, name):
          return self.wldoc.source.attachments[name]
-