librarian update

[redakcja.git] / apps / catalogue / models / book.py
diff --git a/apps/catalogue/models/book.py b/apps/catalogue/models/book.py

index ca38998..4c35e68 100755 (executable)
--- a/apps/catalogue/models/book.py
+++ b/apps/catalogue/models/book.py
@@ -7,20 +7,16 @@ from django.contrib.sites.models import Site
  from django.db import models, transaction
  from django.template.loader import render_to_string
  from django.utils.translation import ugettext_lazy as _
  from django.db import models, transaction
  from django.template.loader import render_to_string
  from django.utils.translation import ugettext_lazy as _
-from django.conf import settings
  from slughifi import slughifi
  
  from slughifi import slughifi
  
-
  import apiclient
  from catalogue.helpers import cached_in_field, GalleryMerger
  import apiclient
  from catalogue.helpers import cached_in_field, GalleryMerger
-from catalogue.models import BookPublishRecord, ChunkPublishRecord
+from catalogue.models import BookPublishRecord, ChunkPublishRecord, Project
  from catalogue.signals import post_publish
  from catalogue.tasks import refresh_instance, book_content_updated
  from catalogue.xml_tools import compile_text, split_xml
  from cover.models import Image
  from catalogue.signals import post_publish
  from catalogue.tasks import refresh_instance, book_content_updated
  from catalogue.xml_tools import compile_text, split_xml
  from cover.models import Image
-import os
-import shutil
-import re
+
  
  class Book(models.Model):
      """ A document edited on the wiki """
  
  class Book(models.Model):
      """ A document edited on the wiki """
@@ -28,21 +24,25 @@ class Book(models.Model):
      title = models.CharField(_('title'), max_length=255, db_index=True)
      slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
      public = models.BooleanField(_('public'), default=True, db_index=True)
      title = models.CharField(_('title'), max_length=255, db_index=True)
      slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
      public = models.BooleanField(_('public'), default=True, db_index=True)
-    gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
+    gallery = models.CharField(u'materiały', max_length=255, blank=True)
+    project = models.ForeignKey(Project, null=True, blank=True)
  
  
-    #wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False)
-    parent = models.ForeignKey('self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
+    # wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False)
+    parent = models.ForeignKey(
+        'self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
      parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  
      parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  
+    for_cybernauts = models.BooleanField(_('for Cybernauts'), default=False)
+
      # Cache
      _short_html = models.TextField(null=True, blank=True, editable=False)
      _single = models.NullBooleanField(editable=False, db_index=True)
      _new_publishable = models.NullBooleanField(editable=False)
      _published = models.NullBooleanField(editable=False)
      _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
      # Cache
      _short_html = models.TextField(null=True, blank=True, editable=False)
      _single = models.NullBooleanField(editable=False, db_index=True)
      _new_publishable = models.NullBooleanField(editable=False)
      _published = models.NullBooleanField(editable=False)
      _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
-    dc_cover_image = models.ForeignKey(Image, blank=True, null=True, db_index=True, on_delete=models.SET_NULL)
-    dc_slug = models.CharField(max_length=128, null=True, blank=True,
-            editable=False, db_index=True)
+    dc_cover_image = models.ForeignKey(
+        Image, blank=True, null=True, db_index=True, on_delete=models.SET_NULL, editable=False)
+    dc_slug = models.CharField(max_length=128, null=True, blank=True, editable=False, db_index=True)
  
      class NoTextError(BaseException):
          pass
  
      class NoTextError(BaseException):
          pass
@@ -50,9 +50,8 @@ class Book(models.Model):
      class Meta:
          app_label = 'catalogue'
          ordering = ['title', 'slug']
      class Meta:
          app_label = 'catalogue'
          ordering = ['title', 'slug']
-        verbose_name = _('book')
-        verbose_name_plural = _('books')
-
+        verbose_name = u'moduł'
+        verbose_name_plural = u'moduły'
  
      # Representing
      # ============
  
      # Representing
      # ============
@@ -77,7 +76,7 @@ class Book(models.Model):
  
      @models.permalink
      def get_absolute_url(self):
  
      @models.permalink
      def get_absolute_url(self):
-        return ("catalogue_book", [self.slug])
+        return "catalogue_book", [self.slug]
  
      def correct_about(self):
          return "http://%s%s" % (
  
      def correct_about(self):
          return "http://%s%s" % (
@@ -93,8 +92,8 @@ class Book(models.Model):
  
      @classmethod
      @transaction.commit_on_success
  
      @classmethod
      @transaction.commit_on_success
-    def create(cls, creator, text, *args, **kwargs):
-        b = cls.objects.create(*args, **kwargs)
+    def create(cls, creator, text, **kwargs):
+        b = cls.objects.create(**kwargs)
          b.chunk_set.all().update(creator=creator)
          b[0].commit(text, author=creator)
          return b
          b.chunk_set.all().update(creator=creator)
          b[0].commit(text, author=creator)
          return b
@@ -105,8 +104,7 @@ class Book(models.Model):
  
      @classmethod
      @transaction.commit_on_success
  
      @classmethod
      @transaction.commit_on_success
-    def import_xml_text(cls, text=u'', previous_book=None,
-                commit_args=None, **kwargs):
+    def import_xml_text(cls, text=u'', previous_book=None, commit_args=None, **kwargs):
          """Imports a book from XML, splitting it into chunks as necessary."""
          texts = split_xml(text)
          if previous_book:
          """Imports a book from XML, splitting it into chunks as necessary."""
          texts = split_xml(text)
          if previous_book:
@@ -120,7 +118,6 @@ class Book(models.Model):
          for i in range(book_len - len(texts)):
              texts.append((u'pusta część %d' % (i + 1), u''))
  
          for i in range(book_len - len(texts)):
              texts.append((u'pusta część %d' % (i + 1), u''))
  
-        i = 0
          for i, (title, text) in enumerate(texts):
              if not title:
                  title = u'część %d' % (i + 1)
          for i, (title, text) in enumerate(texts):
              if not title:
                  title = u'część %d' % (i + 1)
@@ -140,7 +137,7 @@ class Book(models.Model):
          return instance
  
      def make_chunk_slug(self, proposed):
          return instance
  
      def make_chunk_slug(self, proposed):
-        """ 
+        """
              Finds a chunk slug not yet used in the book.
          """
          slugs = set(c.slug for c in self)
              Finds a chunk slug not yet used in the book.
          """
          slugs = set(c.slug for c in self)
@@ -203,13 +200,13 @@ class Book(models.Model):
  
          # and move the gallery starts
          if gm.was_merged:
  
          # and move the gallery starts
          if gm.was_merged:
-                for chunk in self[len(self) - len_other:]:
-                        chunk.gallery_start += gm.dest_size - gm.num_deleted
-                        chunk.save()
+            for chunk in self[len(self) - len_other:]:
+                old_start = chunk.gallery_start or 1
+                chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
+                chunk.save()
  
          other.delete()
  
  
          other.delete()
  
-
      @transaction.commit_on_success
      def prepend_history(self, other):
          """Prepend history from all the other book's chunks to own."""
      @transaction.commit_on_success
      def prepend_history(self, other):
          """Prepend history from all the other book's chunks to own."""
@@ -228,10 +225,8 @@ class Book(models.Model):
  
      def split(self):
          """Splits all the chunks into separate books."""
  
      def split(self):
          """Splits all the chunks into separate books."""
-        self.title
          for chunk in self:
          for chunk in self:
-            book = Book.objects.create(title=chunk.title, slug=chunk.slug,
-                    public=self.public, gallery=self.gallery)
+            book = Book.objects.create(title=chunk.title, slug=chunk.slug, public=self.public, gallery=self.gallery)
              book[0].delete()
              chunk.book = book
              chunk.number = 1
              book[0].delete()
              chunk.book = book
              chunk.number = 1
@@ -251,16 +246,18 @@ class Book(models.Model):
      def assert_publishable(self):
          assert self.chunk_set.exists(), _('No chunks in the book.')
          try:
      def assert_publishable(self):
          assert self.chunk_set.exists(), _('No chunks in the book.')
          try:
-            changes = self.get_current_changes(publishable=True)
+            changes = self.get_current_changes()
          except self.NoTextError:
              raise AssertionError(_('Not all chunks have publishable revisions.'))
          except self.NoTextError:
              raise AssertionError(_('Not all chunks have publishable revisions.'))
-        book_xml = self.materialize(changes=changes)
  
  
-        from librarian.dcparser import BookInfo
          from librarian import NoDublinCore, ParseError, ValidationError
  
          try:
          from librarian import NoDublinCore, ParseError, ValidationError
  
          try:
-            bi = BookInfo.from_string(book_xml.encode('utf-8'), strict=True)
+            bi = self.wldocument(changes=changes, strict=True).book_info
+            if not bi.audience:
+                raise ValidationError('No audience specified')
+            if not bi.type:
+                raise ValidationError('No type specified')
          except ParseError, e:
              raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
          except NoDublinCore:
          except ParseError, e:
              raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
          except NoDublinCore:
@@ -271,6 +268,12 @@ class Book(models.Model):
          valid_about = self.correct_about()
          assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
  
          valid_about = self.correct_about()
          assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
  
+    def publishable_error(self):
+        try:
+            return self.assert_publishable()
+        except AssertionError, e:
+            return e
+
      def hidden(self):
          return self.slug.startswith('.')
  
      def hidden(self):
          return self.slug.startswith('.')
  
@@ -300,8 +303,7 @@ class Book(models.Model):
      def get_on_track(self):
          if self.published:
              return -1
      def get_on_track(self):
          if self.published:
              return -1
-        stages = [ch.stage.ordering if ch.stage is not None else 0
-                    for ch in self]
+        stages = [ch.stage.ordering if ch.stage is not None else 0 for ch in self]
          if not len(stages):
              return 0
          return min(stages)
          if not len(stages):
              return 0
          return min(stages)
@@ -317,14 +319,14 @@ class Book(models.Model):
  
      def book_info(self, publishable=True):
          try:
  
      def book_info(self, publishable=True):
          try:
-            book_xml = self.materialize(publishable=publishable)
+            book_xml = self.wl1_xml(publishable=publishable)
          except self.NoTextError:
              pass
          else:
              from librarian.dcparser import BookInfo
              from librarian import NoDublinCore, ParseError, ValidationError
              try:
          except self.NoTextError:
              pass
          else:
              from librarian.dcparser import BookInfo
              from librarian import NoDublinCore, ParseError, ValidationError
              try:
-                return BookInfo.from_string(book_xml.encode('utf-8'))
+                return BookInfo.from_string(book_xml)
              except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
                  return None
  
              except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
                  return None
  
@@ -340,7 +342,7 @@ class Book(models.Model):
              if info.cover_source:
                  try:
                      image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
              if info.cover_source:
                  try:
                      image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
-                except:
+                except Image.DoesNotExist:
                      pass
                  else:
                      if info.cover_source == image.get_full_url():
                      pass
                  else:
                      if info.cover_source == image.get_full_url():
@@ -386,7 +388,7 @@ class Book(models.Model):
          return changes
  
      def materialize(self, publishable=False, changes=None):
          return changes
  
      def materialize(self, publishable=False, changes=None):
-        """ 
+        """
              Get full text of the document compiled from chunks.
              Takes the current versions of all texts
              or versions most recently tagged for publishing,
              Get full text of the document compiled from chunks.
              Takes the current versions of all texts
              or versions most recently tagged for publishing,
@@ -396,25 +398,102 @@ class Book(models.Model):
              changes = self.get_current_changes(publishable)
          return compile_text(change.materialize() for change in changes)
  
              changes = self.get_current_changes(publishable)
          return compile_text(change.materialize() for change in changes)
  
-    def wldocument(self, publishable=True, changes=None, parse_dublincore=True):
+    def wldocument(self, publishable=True, changes=None, parse_dublincore=True, strict=False):
          from catalogue.ebook_utils import RedakcjaDocProvider
          from librarian.parser import WLDocument
  
          return WLDocument.from_string(
          from catalogue.ebook_utils import RedakcjaDocProvider
          from librarian.parser import WLDocument
  
          return WLDocument.from_string(
-                self.materialize(publishable=publishable, changes=changes),
+                self.wl1_xml(publishable=publishable, changes=changes),
                  provider=RedakcjaDocProvider(publishable=publishable),
                  provider=RedakcjaDocProvider(publishable=publishable),
-                parse_dublincore=parse_dublincore)
+                parse_dublincore=parse_dublincore,
+                strict=strict)
  
  
-    def publish(self, user):
+    def publish(self, user, host=None):
          """
              Publishes a book on behalf of a (local) user.
          """
          """
              Publishes a book on behalf of a (local) user.
          """
+        import json
+        import os
+        from django.conf import settings
          self.assert_publishable()
          self.assert_publishable()
-        changes = self.get_current_changes(publishable=True)
-        book_xml = self.materialize(changes=changes)
-        apiclient.api_call(user, "books/", {"book_xml": book_xml})
+        changes = self.get_current_changes()
+        data = {"lesson_xml": self.wl1_xml(changes=changes)}
+        if host:
+            gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug)
+            gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug)
+            if os.path.isdir(gallery_dir):
+                data['gallery_url'] = gallery_url
+                data['attachments'] = json.dumps(os.listdir(gallery_dir))
+        apiclient.api_call(user, "lessons/", data)
          # record the publish
          br = BookPublishRecord.objects.create(book=self, user=user)
          for c in changes:
              ChunkPublishRecord.objects.create(book_record=br, change=c)
          post_publish.send(sender=br)
          # record the publish
          br = BookPublishRecord.objects.create(book=self, user=user)
          for c in changes:
              ChunkPublishRecord.objects.create(book_record=br, change=c)
          post_publish.send(sender=br)
+
+    def wl1_xml(self, publishable=True, changes=None):
+        from lxml import etree
+        import re
+        from StringIO import StringIO
+        from urllib import unquote
+        import os.path
+        from django.conf import settings
+        from fnpdjango.utils.text.slughifi import slughifi
+        from librarian import ParseError, DCNS
+
+        def _register_function(f):
+            """ Register extension function with lxml """
+            ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
+            ns[f.__name__] = f
+            return f
+
+        @_register_function
+        def slugify(context, text):
+            """Remove unneeded whitespace from beginning and end"""
+            if isinstance(text, list):
+                text = ''.join(text)
+            return slughifi(text)
+
+        @_register_function
+        def rmext(context, text):
+            if isinstance(text, list):
+                text = ''.join(text)
+            text = unquote(text)
+            if '.' in text:
+                name, ext = text.rsplit('.', 1)
+                if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'):
+                    text = name
+            return text
+
+        t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt'))
+        ft = self.materialize(publishable=publishable, changes=changes)
+        ft = ft.replace('&nbsp;', ' ')
+        f2 = StringIO(ft)
+        i1 = etree.parse(f2)
+
+        for sect in i1.findall('//section'):
+            if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć':
+                # Prostujemy.
+                first = sect.find('section')
+                subs = first.findall('.//section')
+                for sub in subs:
+                    sect.append(sub)
+                break
+        else:
+            # print 'BRAK PRZEBIEGU'
+            dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri})
+            if dc_type and dc_type[0] in ('course', 'synthetic'):
+                raise ParseError('Brak przebiegu')
+
+        i1.getroot().attrib['redslug'] = self.slug
+        i1.getroot().attrib['wlslug'] = self.slug  # THIS!
+        # print '.',
+        w1t = i1.xslt(t)
+        for h in w1t.findall('//aktywnosc/opis'):
+            if len(h) == 0:
+                raise ParseError('Pusty element aktywnosc/opis')
+            # FIXME assumption that every lesson has at most 9 parts
+            if not h[0].text or not re.match(r'\d\.\s', h[0].text):
+                raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text))
+            h[0].text = h[0].text[3:]
+        return etree.tostring(w1t, encoding='utf-8')