slugify anchors, fix some minor html issues
[librarian.git] / librarian / pypdf.py
index b56d1a6..d89d4af 100644 (file)
@@ -9,30 +9,20 @@ Creates one big XML from the book and its children, converts it to LaTeX
 with TeXML, then runs it by XeLaTeX.
 
 """
-from __future__ import with_statement
 from copy import deepcopy
-import os
 import os.path
 import shutil
-from StringIO import StringIO
-from tempfile import mkdtemp, NamedTemporaryFile
 import re
 import random
-from copy import deepcopy
-from subprocess import call, PIPE
 from urllib2 import urlopen
 
-from Texml.processor import process
 from lxml import etree
-from lxml.etree import XMLSyntaxError, XSLTApplyError
 
 from xmlutils import Xmill, tag, tagged, ifoption, tag_open_close
 from librarian.dcparser import Person
-from librarian.parser import WLDocument
-from librarian import ParseError, DCNS, get_resource, IOFile, Format
+from librarian import DCNS, get_resource, IOFile
 from librarian import functions
-from pdf import PDFFormat
-
+from pdf import PDFFormat, substitute_hyphens, fix_hanging
 
 
 def escape(really):
@@ -85,9 +75,9 @@ class EduModule(Xmill):
             if self.options['strofa']:
                 txt = txt.replace("/\n", '<ctrl ch="\\"/>')
             return txt
+        self.register_text_filter(swap_endlines)
         self.register_text_filter(functions.substitute_entities)
         self.register_text_filter(mark_alien_characters)
-        self.register_text_filter(swap_endlines)
 
     def get_dc(self, element, dc_field, single=False):
         values = map(lambda t: t.text, element.xpath("//dc:%s" % dc_field, namespaces={'dc': DCNS.uri}))
@@ -108,11 +98,15 @@ class EduModule(Xmill):
           u'</cmd>'
 
     @escape(True)
-    def get_authors(self, element):
-        authors = self.get_dc(element, 'creator.expert') + \
-          self.get_dc(element, 'creator.scenario') + \
-          self.get_dc(element, 'creator.textbook')
-        return u', '.join(authors)
+    def get_authors(self, element, which=None):
+        dc = self.options['wldoc'].book_info
+        if which is None:
+            authors = dc.authors_textbook + \
+                dc.authors_scenario + \
+                dc.authors_expert
+        else:
+            authors = getattr(dc, "authors_%s" % which)
+        return u', '.join(author.readable() for author in authors)
 
     @escape(1)
     def get_title(self, element):
@@ -133,9 +127,13 @@ class EduModule(Xmill):
             \\usepackage{morefloats}
         }{}'''),
     u'''\\def\\authors{%s}''' % self.get_authors(element),
+    u'''\\def\\authorsexpert{%s}''' % self.get_authors(element, 'expert'),
+    u'''\\def\\authorsscenario{%s}''' % self.get_authors(element, 'scenario'),
+    u'''\\def\\authorstextbook{%s}''' % self.get_authors(element, 'textbook'),
+    
     u'''\\author{\\authors}''',
     u'''\\title{%s}''' % self.get_title(element),
-    u'''\\def\\bookurl{%s}''' % self.get_dc(element, 'identifier.url', True),
+    u'''\\def\\bookurl{%s}''' % self.options['wldoc'].book_info.url.canonical(),
     u'''\\def\\rightsinfo{%s}''' % self.get_rightsinfo(element),
     u'</TeXML>']
 
@@ -147,7 +145,7 @@ class EduModule(Xmill):
         return u"""
     <env name="document">
     <cmd name="maketitle"/>
-    """, """</env>"""
+    """, """<cmd name="editorialsection" /></env>"""
 
     @escape(1)
     def handle_texcommand(self, element):
@@ -196,8 +194,17 @@ class EduModule(Xmill):
     handle_srodtytul = \
     handle_tytul_dziela = \
     handle_wyroznienie = \
+    handle_dywiz = \
     handle_texcommand
 
+    def handle_uwaga(self, _e):
+        return None
+    def handle_extra(self, _e):
+        return None
+
+    def handle_nbsp(self, _e):
+        return '<spec cat="tilde" />'
+
     _handle_strofa = cmd("strofa")
 
     def handle_strofa(self, element):
@@ -231,7 +238,7 @@ class EduModule(Xmill):
         counter = self.activity_counter
 
         return u"""
-
+<cmd name="noindent" />
 <cmd name="activitycounter"><parm>%(counter)d.</parm></cmd>
 <cmd name="activityinfo"><parm>
  <cmd name="activitytime"><parm>%(czas)s</parm></cmd>
@@ -259,17 +266,20 @@ class EduModule(Xmill):
         return
 
     def handle_lista(self, element, attrs={}):
-        if not element.findall("punkt"):
-            return None
         ltype = element.attrib.get('typ', 'punkt')
+        if not element.findall("punkt"):
+            if ltype == 'czytelnia':
+                return 'W przygotowaniu.'
+            else:
+                return None
         if ltype == 'slowniczek':
             surl = element.attrib.get('src', None)
             if surl is None:
                 # print '** missing src on <slowniczek>, setting default'
-                surl = 'http://edukacjamedialna.edu.pl/slowniczek'
+                surl = 'http://edukacjamedialna.edu.pl/lekcje/slowniczek/'
             sxml = None
             if surl:
-                sxml = etree.fromstring(self.options['provider'].by_uri(surl).get_string())
+                sxml = etree.fromstring(self.options['wldoc'].provider.by_uri(surl).get_string())
             self.options = {'slowniczek': True, 'slowniczek_xml': sxml }
 
         listcmd = {'num': 'enumerate',
@@ -361,7 +371,7 @@ class EduModule(Xmill):
             else:
                 return cmd('href', parms=[element.attrib['url']])(self, element)
         else:
-            return cmd('em')(self, element)
+            return cmd('emph')(self, element)
 
     def handle_obraz(self, element):
         frmt = self.options['format']
@@ -583,10 +593,15 @@ def fix_lists(tree):
 
 
 class EduModulePDFFormat(PDFFormat):
+    style = get_resource('res/styles/edumed/pdf/edumed.sty')
+
     def get_texml(self):
+        substitute_hyphens(self.wldoc.edoc)
+        fix_hanging(self.wldoc.edoc)
+
         self.attachments = {}
         edumod = EduModule({
-            'provider': self.wldoc.provider, 
+            "wldoc": self.wldoc,
             "format": self,
             "teacher": self.customization.get('teacher'),
         })
@@ -597,6 +612,7 @@ class EduModulePDFFormat(PDFFormat):
 
     def get_tex_dir(self):
         temp = super(EduModulePDFFormat, self).get_tex_dir()
+        shutil.copy(get_resource('res/styles/edumed/logo.png'), temp)
         for name, iofile in self.attachments.items():
             iofile.save_as(os.path.join(temp, name))
         return temp