minor cleanup

[redakcja.git] / apps / catalogue / models / book.py
diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py

index 38d8499..4c35e68 100755 (executable)
--- a/apps/catalogue/models/book.py
+++ b/apps/catalogue/models/book.py
@@ -32,6 +32,8 @@ class Book(models.Model):
          'self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
      parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  
+    for_cybernauts = models.BooleanField(_('for Cybernauts'), default=False)
+
      # Cache
      _short_html = models.TextField(null=True, blank=True, editable=False)
      _single = models.NullBooleanField(editable=False, db_index=True)
@@ -135,7 +137,7 @@ class Book(models.Model):
          return instance
  
      def make_chunk_slug(self, proposed):
-        """ 
+        """
              Finds a chunk slug not yet used in the book.
          """
          slugs = set(c.slug for c in self)
@@ -198,10 +200,10 @@ class Book(models.Model):
  
          # and move the gallery starts
          if gm.was_merged:
-                for chunk in self[len(self) - len_other:]:
-                        old_start = chunk.gallery_start or 1
-                        chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
-                        chunk.save()
+            for chunk in self[len(self) - len_other:]:
+                old_start = chunk.gallery_start or 1
+                chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
+                chunk.save()
  
          other.delete()
  
@@ -244,7 +246,7 @@ class Book(models.Model):
      def assert_publishable(self):
          assert self.chunk_set.exists(), _('No chunks in the book.')
          try:
-            changes = self.get_current_changes(publishable=True)
+            changes = self.get_current_changes()
          except self.NoTextError:
              raise AssertionError(_('Not all chunks have publishable revisions.'))
  
@@ -252,6 +254,10 @@ class Book(models.Model):
  
          try:
              bi = self.wldocument(changes=changes, strict=True).book_info
+            if not bi.audience:
+                raise ValidationError('No audience specified')
+            if not bi.type:
+                raise ValidationError('No type specified')
          except ParseError, e:
              raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
          except NoDublinCore:
@@ -313,14 +319,14 @@ class Book(models.Model):
  
      def book_info(self, publishable=True):
          try:
-            book_xml = self.materialize(publishable=publishable)
+            book_xml = self.wl1_xml(publishable=publishable)
          except self.NoTextError:
              pass
          else:
              from librarian.dcparser import BookInfo
              from librarian import NoDublinCore, ParseError, ValidationError
              try:
-                return BookInfo.from_string(book_xml.encode('utf-8'))
+                return BookInfo.from_string(book_xml)
              except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
                  return None
  
@@ -382,7 +388,7 @@ class Book(models.Model):
          return changes
  
      def materialize(self, publishable=False, changes=None):
-        """ 
+        """
              Get full text of the document compiled from chunks.
              Takes the current versions of all texts
              or versions most recently tagged for publishing,
@@ -397,21 +403,97 @@ class Book(models.Model):
          from librarian.parser import WLDocument
  
          return WLDocument.from_string(
-                self.materialize(publishable=publishable, changes=changes),
+                self.wl1_xml(publishable=publishable, changes=changes),
                  provider=RedakcjaDocProvider(publishable=publishable),
                  parse_dublincore=parse_dublincore,
                  strict=strict)
  
-    def publish(self, user):
+    def publish(self, user, host=None):
          """
              Publishes a book on behalf of a (local) user.
          """
+        import json
+        import os
+        from django.conf import settings
          self.assert_publishable()
-        changes = self.get_current_changes(publishable=True)
-        book_xml = self.materialize(changes=changes)
-        apiclient.api_call(user, "books/", {"book_xml": book_xml})
+        changes = self.get_current_changes()
+        data = {"lesson_xml": self.wl1_xml(changes=changes)}
+        if host:
+            gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug)
+            gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug)
+            if os.path.isdir(gallery_dir):
+                data['gallery_url'] = gallery_url
+                data['attachments'] = json.dumps(os.listdir(gallery_dir))
+        apiclient.api_call(user, "lessons/", data)
          # record the publish
          br = BookPublishRecord.objects.create(book=self, user=user)
          for c in changes:
              ChunkPublishRecord.objects.create(book_record=br, change=c)
          post_publish.send(sender=br)
+
+    def wl1_xml(self, publishable=True, changes=None):
+        from lxml import etree
+        import re
+        from StringIO import StringIO
+        from urllib import unquote
+        import os.path
+        from django.conf import settings
+        from fnpdjango.utils.text.slughifi import slughifi
+        from librarian import ParseError, DCNS
+
+        def _register_function(f):
+            """ Register extension function with lxml """
+            ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+            ns[f.__name__] = f
+            return f
+
+        @_register_function
+        def slugify(context, text):
+            """Remove unneeded whitespace from beginning and end"""
+            if isinstance(text, list):
+                text = ''.join(text)
+            return slughifi(text)
+
+        @_register_function
+        def rmext(context, text):
+            if isinstance(text, list):
+                text = ''.join(text)
+            text = unquote(text)
+            if '.' in text:
+                name, ext = text.rsplit('.', 1)
+                if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'):
+                    text = name
+            return text
+
+        t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt'))
+        ft = self.materialize(publishable=publishable, changes=changes)
+        ft = ft.replace('&nbsp;', ' ')
+        f2 = StringIO(ft)
+        i1 = etree.parse(f2)
+
+        for sect in i1.findall('//section'):
+            if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć':
+                # Prostujemy.
+                first = sect.find('section')
+                subs = first.findall('.//section')
+                for sub in subs:
+                    sect.append(sub)
+                break
+        else:
+            # print 'BRAK PRZEBIEGU'
+            dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri})
+            if dc_type and dc_type[0] in ('course', 'synthetic'):
+                raise ParseError('Brak przebiegu')
+
+        i1.getroot().attrib['redslug'] = self.slug
+        i1.getroot().attrib['wlslug'] = self.slug  # THIS!
+        # print '.',
+        w1t = i1.xslt(t)
+        for h in w1t.findall('//aktywnosc/opis'):
+            if len(h) == 0:
+                raise ParseError('Pusty element aktywnosc/opis')
+            # FIXME assumption that every lesson has at most 9 parts
+            if not h[0].text or not re.match(r'\d\.\s', h[0].text):
+                raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text))
+            h[0].text = h[0].text[3:]
+        return etree.tostring(w1t, encoding='utf-8')