ugly fix (error reporting)

[librarian.git] / librarian / pypdf.py
diff --git a/librarian/pypdf.py b/librarian/pypdf.py

index 387f647..bb2881f 100644 (file)
--- a/librarian/pypdf.py
+++ b/librarian/pypdf.py
@@ -9,29 +9,20 @@ Creates one big XML from the book and its children, converts it to LaTeX
  with TeXML, then runs it by XeLaTeX.
  
  """
  with TeXML, then runs it by XeLaTeX.
  
  """
-from __future__ import with_statement
  from copy import deepcopy
  from copy import deepcopy
-import os
  import os.path
  import shutil
  import os.path
  import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
  import re
  import random
  import re
  import random
-from copy import deepcopy
-from subprocess import call, PIPE
+from urllib2 import urlopen
  
  
-from Texml.processor import process
  from lxml import etree
  from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
  
  from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
  from librarian.dcparser import Person
  
  from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
  from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from librarian import DCNS, get_resource, IOFile
  from librarian import functions
  from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
  
  
  def escape(really):
  
  
  def escape(really):
@@ -52,7 +43,7 @@ def escape(really):
  
  
  def cmd(name, parms=None):
  
  
  def cmd(name, parms=None):
-    def wrap(self, element):
+    def wrap(self, element=None):
          pre, post = tag_open_close('cmd', name=name)
  
          if parms:
          pre, post = tag_open_close('cmd', name=name)
  
          if parms:
@@ -60,9 +51,12 @@ def cmd(name, parms=None):
                  e = etree.Element("parm")
                  e.text = parm
                  pre += etree.tostring(e)
                  e = etree.Element("parm")
                  e.text = parm
                  pre += etree.tostring(e)
-        pre += "<parm>"
-        post = "</parm>" + post
-        return pre, post
+        if element is not None:
+            pre += "<parm>"
+            post = "</parm>" + post
+            return pre, post
+        else:
+            return pre + post
      return wrap
  
  
      return wrap
  
  
@@ -72,18 +66,19 @@ def mark_alien_characters(text):
  
  
  class EduModule(Xmill):
  
  
  class EduModule(Xmill):
-    def __init__(self, options=None):
-        super(EduModule, self).__init__(options)
+    def __init__(self, options=None, state=None):
+        super(EduModule, self).__init__(options, state)
          self.activity_counter = 0
          self.activity_counter = 0
+        self.activity_last = None
          self.exercise_counter = 0
  
          def swap_endlines(txt):
              if self.options['strofa']:
                  txt = txt.replace("/\n", '<ctrl ch="\\"/>')
              return txt
          self.exercise_counter = 0
  
          def swap_endlines(txt):
              if self.options['strofa']:
                  txt = txt.replace("/\n", '<ctrl ch="\\"/>')
              return txt
+        self.register_text_filter(swap_endlines)
          self.register_text_filter(functions.substitute_entities)
          self.register_text_filter(mark_alien_characters)
          self.register_text_filter(functions.substitute_entities)
          self.register_text_filter(mark_alien_characters)
-        self.register_text_filter(swap_endlines)
  
      def get_dc(self, element, dc_field, single=False):
          values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
  
      def get_dc(self, element, dc_field, single=False):
          values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@ -104,11 +99,15 @@ class EduModule(Xmill):
            u'</cmd>'
  
      @escape(True)
            u'</cmd>'
  
      @escape(True)
-    def get_authors(self, element):
-        authors = self.get_dc(element, 'creator.expert') + \
-          self.get_dc(element, 'creator.scenario') + \
-          self.get_dc(element, 'creator.textbook')
-        return u', '.join(authors)
+    def get_authors(self, element, which=None):
+        dc = self.options['wldoc'].book_info
+        if which is None:
+            authors = dc.authors_textbook + \
+                dc.authors_scenario + \
+                dc.authors_expert
+        else:
+            authors = getattr(dc, "authors_%s" % which)
+        return u', '.join(author.readable() for author in authors if author)
  
      @escape(1)
      def get_title(self, element):
  
      @escape(1)
      def get_title(self, element):
@@ -129,9 +128,13 @@ class EduModule(Xmill):
              \\usepackage{morefloats}
          }{}'''),
      u'''\\def\\authors{%s}''' % self.get_authors(element),
              \\usepackage{morefloats}
          }{}'''),
      u'''\\def\\authors{%s}''' % self.get_authors(element),
+    u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+    u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+    u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+    
      u'''\\author{\\authors}''',
      u'''\\title{%s}''' % self.get_title(element),
      u'''\\author{\\authors}''',
      u'''\\title{%s}''' % self.get_title(element),
-    u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
+    u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
      u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
      u'</TeXML>']
  
      u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
      u'</TeXML>']
  
@@ -143,7 +146,7 @@ class EduModule(Xmill):
          return u"""
      <env name="document">
      <cmd name="maketitle"/>
          return u"""
      <env name="document">
      <cmd name="maketitle"/>
-    """, """</env>"""
+    """, """<cmd name="editorialsection" /></env>"""
  
      @escape(1)
      def handle_texcommand(self, element):
  
      @escape(1)
      def handle_texcommand(self, element):
@@ -171,10 +174,6 @@ class EduModule(Xmill):
      handle_naglowek_czesc = \
      handle_naglowek_listy = \
      handle_naglowek_osoba = \
      handle_naglowek_czesc = \
      handle_naglowek_listy = \
      handle_naglowek_osoba = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_podrozdzial = \
-    handle_naglowek_rozdzial = \
-    handle_naglowek_rozdzial = \
      handle_naglowek_scena = \
      handle_nazwa_utworu = \
      handle_nota = \
      handle_naglowek_scena = \
      handle_nazwa_utworu = \
      handle_nota = \
@@ -192,8 +191,41 @@ class EduModule(Xmill):
      handle_srodtytul = \
      handle_tytul_dziela = \
      handle_wyroznienie = \
      handle_srodtytul = \
      handle_tytul_dziela = \
      handle_wyroznienie = \
+    handle_dywiz = \
      handle_texcommand
  
      handle_texcommand
  
+    def handle_naglowek_rozdzial(self, element):
+        if not self.options['teacher']:
+            if element.text.startswith((u'Wiedza', u'Zadania', u'Słowniczek', u'Dla ucznia')):
+                self.state['mute'] = False
+            else:
+                self.state['mute'] = True
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_rozdzial.unmuter = True
+
+    def handle_naglowek_podrozdzial(self, element):
+        self.activity_counter = 0
+        if not self.options['teacher']:
+            if element.text.startswith(u'Dla ucznia'):
+                self.state['mute'] = False
+                return None
+            elif element.text.startswith(u'Dla nauczyciela'):
+                self.state['mute'] = True
+                return None
+            elif self.state['mute']:
+                return None
+        return self.handle_texcommand(element)
+    handle_naglowek_podrozdzial.unmuter = True
+
+    def handle_uwaga(self, _e):
+        return None
+    def handle_extra(self, _e):
+        return None
+
+    def handle_nbsp(self, _e):
+        return '<spec cat="tilde" />'
+
      _handle_strofa = cmd("strofa")
  
      def handle_strofa(self, element):
      _handle_strofa = cmd("strofa")
  
      def handle_strofa(self, element):
@@ -207,9 +239,12 @@ class EduModule(Xmill):
              'activity_counter': self.activity_counter,
              'sub_gen': True,
          }
              'activity_counter': self.activity_counter,
              'sub_gen': True,
          }
-        submill = EduModule(self.options)
+        submill = EduModule(self.options, self.state)
  
  
-        opis = submill.generate(element.xpath('opis')[0])
+        if element.xpath('opis'):
+            opis = submill.generate(element.xpath('opis')[0])
+        else:
+            opis = ''
  
          n = element.xpath('wskazowki')
          if n: wskazowki = submill.generate(n[0])
  
          n = element.xpath('wskazowki')
          if n: wskazowki = submill.generate(n[0])
@@ -226,9 +261,16 @@ class EduModule(Xmill):
  
          counter = self.activity_counter
  
  
          counter = self.activity_counter
  
-        return u"""
+        if element.getnext().tag == 'aktywnosc' or (self.activity_last and self.activity_last.getnext() == element):
+            counter_tex = """<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>""" % locals()
+        else:
+            counter_tex = ''
+
+        self.activity_last = element
  
  
-<cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
+        return u"""
+<cmd name="noindent" />
+%(counter_tex)s
  <cmd name="activityinfo"><parm>
   <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
   <cmd name="activityform"><parm>%(forma)s</parm></cmd>
  <cmd name="activityinfo"><parm>
   <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
   <cmd name="activityform"><parm>%(forma)s</parm></cmd>
@@ -255,14 +297,18 @@ class EduModule(Xmill):
          return
  
      def handle_lista(self, element, attrs={}):
          return
  
      def handle_lista(self, element, attrs={}):
-        if not element.findall("punkt"):
-            return None
          ltype = element.attrib.get('typ', 'punkt')
          ltype = element.attrib.get('typ', 'punkt')
+        if not element.findall("punkt"):
+            if ltype == 'czytelnia':
+                return 'W przygotowaniu.'
+            else:
+                return None
          if ltype == 'slowniczek':
          if ltype == 'slowniczek':
-            surl = element.attrib.get('href', None)
-            sxml = None
-            if surl:
-                sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
+            surl = element.attrib.get('src', None)
+            if surl is None:
+                # print '** missing src on <slowniczek>, setting default'
+                surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
+            sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
              self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
  
          listcmd = {'num': 'enumerate',
              self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
  
          listcmd = {'num': 'enumerate',
@@ -291,7 +337,7 @@ class EduModule(Xmill):
          if not typ in exercise_handlers:
              return '(no handler)'
          self.options = {'exercise_counter': self.exercise_counter}
          if not typ in exercise_handlers:
              return '(no handler)'
          self.options = {'exercise_counter': self.exercise_counter}
-        handler = exercise_handlers[typ](self.options)
+        handler = exercise_handlers[typ](self.options, self.state)
          return handler.generate(element)
  
      # XXX this is copied from pyhtml.py, except for return and
          return handler.generate(element)
  
      # XXX this is copied from pyhtml.py, except for return and
@@ -301,14 +347,17 @@ class EduModule(Xmill):
          definiens_s = ''
  
          # let's pull definiens from another document
          definiens_s = ''
  
          # let's pull definiens from another document
-        if self.options['slowniczek_xml'] and (not nxt or nxt.tag != 'definiens'):
+        if self.options['slowniczek_xml'] is not None and (nxt is None or nxt.tag != 'definiens'):
              sxml = self.options['slowniczek_xml']
              assert element.text != ''
              sxml = self.options['slowniczek_xml']
              assert element.text != ''
-            defloc = sxml.xpath("//definiendum[text()='%s']" % element.text)
+            if "'" in (element.text or ''):
+                defloc = sxml.xpath("//definiendum[text()=\"%s\"]" % (element.text or '').strip())
+            else:
+                defloc = sxml.xpath("//definiendum[text()='%s']" % (element.text or '').strip())
              if defloc:
                  definiens = defloc[0].getnext()
                  if definiens.tag == 'definiens':
              if defloc:
                  definiens = defloc[0].getnext()
                  if definiens.tag == 'definiens':
-                    subgen = EduModule(self.options)
+                    subgen = EduModule(self.options, self.state)
                      definiens_s = subgen.generate(definiens)
  
          return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
                      definiens_s = subgen.generate(definiens)
  
          return u'<cmd name="textbf"><parm>', u"</parm></cmd>: " + definiens_s
@@ -348,9 +397,39 @@ class EduModule(Xmill):
  
      def handle_link(self, element):
          if element.attrib.get('url'):
  
      def handle_link(self, element):
          if element.attrib.get('url'):
-            return cmd('href', parms=[element.attrib['url']])(self, element)
+            url = element.attrib.get('url')
+            if url == element.text:
+                return cmd('url')(self, element)
+            else:
+                return cmd('href', parms=[element.attrib['url']])(self, element)
          else:
          else:
-            return cmd('em')(self, element)
+            return cmd('emph')(self, element)
+
+    def handle_obraz(self, element):
+        frmt = self.options['format']
+        name = element.attrib.get('nazwa', '').strip()
+        image = frmt.get_image(name.strip())
+        name = image.get_filename().rsplit('/', 1)[-1]
+        img_path = "obraz/%s" % name.replace("_", "")
+        frmt.attachments[img_path] = image
+        return cmd("obraz", parms=[img_path])(self)
+
+    def handle_video(self, element):
+        url = element.attrib.get('url')
+        if not url:
+            print '!! <video> missing url'
+            return
+        m = re.match(r'(?:https?://)?(?:www.)?youtube.com/watch\?(?:.*&)?v=([^&]+)(?:$|&)', url)
+        if not m:
+            print '!! unknown <video> url scheme:', url
+            return
+        name = m.group(1)
+        thumb = IOFile.from_string(urlopen
+            ("http://img.youtube.com/vi/%s/0.jpg" % name).read())
+        img_path = "video/%s.jpg" % name.replace("_", "")
+        self.options['format'].attachments[img_path] = thumb
+        canon_url = "https://www.youtube.com/watch?v=%s" % name
+        return cmd("video", parms=[img_path, canon_url])(self)
  
  
  class Exercise(EduModule):
  
  
  class Exercise(EduModule):
@@ -423,13 +502,13 @@ class Wybor(Exercise):
          if not pytania:
              pytania = [element]
          for p in pytania:
          if not pytania:
              pytania = [element]
          for p in pytania:
-            solutions = re.split(r"[, ]+", p.attrib['rozw'])
+            solutions = re.split(r"[, ]+", p.attrib.get('rozw', ''))
              if len(solutions) != 1:
                  is_single_choice = False
                  break
              choices = p.xpath(".//*[@nazwa]")
              uniq = set()
              if len(solutions) != 1:
                  is_single_choice = False
                  break
              choices = p.xpath(".//*[@nazwa]")
              uniq = set()
-            for n in choices: uniq.add(n.attrib['nazwa'])
+            for n in choices: uniq.add(n.attrib.get('nazwa', ''))
              if len(choices) != len(uniq):
                  is_single_choice = False
                  break
              if len(choices) != len(uniq):
                  is_single_choice = False
                  break
@@ -506,7 +585,7 @@ class Zastap(Luki):
          return question.xpath(".//zastap")
  
      def solution(self, piece):
          return question.xpath(".//zastap")
  
      def solution(self, piece):
-        return piece.attrib['rozw']
+        return piece.attrib.get('rozw', '')
  
      def list_header(self):
          return u"Elementy do wstawienia"
  
      def list_header(self):
          return u"Elementy do wstawienia"
@@ -547,9 +626,30 @@ def fix_lists(tree):
  
  
  class EduModulePDFFormat(PDFFormat):
  
  
  class EduModulePDFFormat(PDFFormat):
+    style = get_resource('res/styles/edumed/pdf/edumed.sty')
+
      def get_texml(self):
      def get_texml(self):
-        edumod = EduModule({"teacher": self.customization.get('teacher')})
+        substitute_hyphens(self.wldoc.edoc)
+        fix_hanging(self.wldoc.edoc)
+
+        self.attachments = {}
+        edumod = EduModule({
+            "wldoc": self.wldoc,
+            "format": self,
+            "teacher": self.customization.get('teacher'),
+        })
          texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
  
          open("/tmp/texml.xml", "w").write(texml)
          return texml
          texml = edumod.generate(fix_lists(self.wldoc.edoc.getroot())).encode('utf-8')
  
          open("/tmp/texml.xml", "w").write(texml)
          return texml
+
+    def get_tex_dir(self):
+        temp = super(EduModulePDFFormat, self).get_tex_dir()
+        shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
+        for name, iofile in self.attachments.items():
+            iofile.save_as(os.path.join(temp, name))
+        return temp
+
+    def get_image(self, name):
+        return self.wldoc.source.attachments[name]
+