apps/catalogue/models/book.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 from django.contrib.sites.models import Site
   7 from django.db import models, transaction
   8 from django.template.loader import render_to_string
   9 from django.utils.translation import ugettext_lazy as _
  10 from slughifi import slughifi
  11
  12 import apiclient
  13 from catalogue.helpers import cached_in_field, GalleryMerger
  14 from catalogue.models import BookPublishRecord, ChunkPublishRecord, Project
  15 from catalogue.signals import post_publish
  16 from catalogue.tasks import refresh_instance, book_content_updated
  17 from catalogue.xml_tools import compile_text, split_xml
  18 from cover.models import Image
  19
  20
  21 class Book(models.Model):
  22     """ A document edited on the wiki """
  23
  24     title = models.CharField(_('title'), max_length=255, db_index=True)
  25     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
  26     public = models.BooleanField(_('public'), default=True, db_index=True)
  27     gallery = models.CharField(u'materiały', max_length=255, blank=True)
  28     project = models.ForeignKey(Project, null=True, blank=True)
  29
  30     # wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False)
  31     parent = models.ForeignKey(
  32         'self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
  33     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  34
  35     for_cybernauts = models.BooleanField(_('for Cybernauts'), default=False)
  36
  37     # Cache
  38     _short_html = models.TextField(null=True, blank=True, editable=False)
  39     _single = models.NullBooleanField(editable=False, db_index=True)
  40     _new_publishable = models.NullBooleanField(editable=False)
  41     _published = models.NullBooleanField(editable=False)
  42     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
  43     dc_cover_image = models.ForeignKey(
  44         Image, blank=True, null=True, db_index=True, on_delete=models.SET_NULL, editable=False)
  45     dc_slug = models.CharField(max_length=128, null=True, blank=True, editable=False, db_index=True)
  46
  47     class NoTextError(BaseException):
  48         pass
  49
  50     class Meta:
  51         app_label = 'catalogue'
  52         ordering = ['title', 'slug']
  53         verbose_name = u'moduł'
  54         verbose_name_plural = u'moduły'
  55
  56     # Representing
  57     # ============
  58
  59     def __iter__(self):
  60         return iter(self.chunk_set.all())
  61
  62     def __getitem__(self, chunk):
  63         return self.chunk_set.all()[chunk]
  64
  65     def __len__(self):
  66         return self.chunk_set.count()
  67
  68     def __nonzero__(self):
  69         """
  70             Necessary so that __len__ isn't used for bool evaluation.
  71         """
  72         return True
  73
  74     def __unicode__(self):
  75         return self.title
  76
  77     @models.permalink
  78     def get_absolute_url(self):
  79         return "catalogue_book", [self.slug]
  80
  81     def correct_about(self):
  82         return "http://%s%s" % (
  83             Site.objects.get_current().domain,
  84             self.get_absolute_url()
  85         )
  86
  87     # Creating & manipulating
  88     # =======================
  89
  90     def accessible(self, request):
  91         return self.public or request.user.is_authenticated()
  92
  93     @classmethod
  94     @transaction.commit_on_success
  95     def create(cls, creator, text, **kwargs):
  96         b = cls.objects.create(**kwargs)
  97         b.chunk_set.all().update(creator=creator)
  98         b[0].commit(text, author=creator)
  99         return b
 100
 101     def add(self, *args, **kwargs):
 102         """Add a new chunk at the end."""
 103         return self.chunk_set.reverse()[0].split(*args, **kwargs)
 104
 105     @classmethod
 106     @transaction.commit_on_success
 107     def import_xml_text(cls, text=u'', previous_book=None, commit_args=None, **kwargs):
 108         """Imports a book from XML, splitting it into chunks as necessary."""
 109         texts = split_xml(text)
 110         if previous_book:
 111             instance = previous_book
 112         else:
 113             instance = cls(**kwargs)
 114             instance.save()
 115
 116         # if there are more parts, set the rest to empty strings
 117         book_len = len(instance)
 118         for i in range(book_len - len(texts)):
 119             texts.append((u'pusta część %d' % (i + 1), u''))
 120
 121         for i, (title, text) in enumerate(texts):
 122             if not title:
 123                 title = u'część %d' % (i + 1)
 124
 125             slug = slughifi(title)
 126
 127             if i < book_len:
 128                 chunk = instance[i]
 129                 chunk.slug = slug[:50]
 130                 chunk.title = title[:255]
 131                 chunk.save()
 132             else:
 133                 chunk = instance.add(slug, title)
 134
 135             chunk.commit(text, **commit_args)
 136
 137         return instance
 138
 139     def make_chunk_slug(self, proposed):
 140         """
 141             Finds a chunk slug not yet used in the book.
 142         """
 143         slugs = set(c.slug for c in self)
 144         i = 1
 145         new_slug = proposed[:50]
 146         while new_slug in slugs:
 147             new_slug = "%s_%d" % (proposed[:45], i)
 148             i += 1
 149         return new_slug
 150
 151     @transaction.commit_on_success
 152     def append(self, other, slugs=None, titles=None):
 153         """Add all chunks of another book to self."""
 154         assert self != other
 155
 156         number = self[len(self) - 1].number + 1
 157         len_other = len(other)
 158         single = len_other == 1
 159
 160         if slugs is not None:
 161             assert len(slugs) == len_other
 162         if titles is not None:
 163             assert len(titles) == len_other
 164             if slugs is None:
 165                 slugs = [slughifi(t) for t in titles]
 166
 167         for i, chunk in enumerate(other):
 168             # move chunk to new book
 169             chunk.book = self
 170             chunk.number = number
 171
 172             if titles is None:
 173                 # try some title guessing
 174                 if other.title.startswith(self.title):
 175                     other_title_part = other.title[len(self.title):].lstrip(' /')
 176                 else:
 177                     other_title_part = other.title
 178
 179                 if single:
 180                     # special treatment for appending one-parters:
 181                     # just use the guessed title and original book slug
 182                     chunk.title = other_title_part
 183                     if other.slug.startswith(self.slug):
 184                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
 185                     else:
 186                         chunk.slug = other.slug
 187                 else:
 188                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
 189             else:
 190                 chunk.slug = slugs[i]
 191                 chunk.title = titles[i]
 192
 193             chunk.slug = self.make_chunk_slug(chunk.slug)
 194             chunk.save()
 195             number += 1
 196         assert not other.chunk_set.exists()
 197
 198         gm = GalleryMerger(self.gallery, other.gallery)
 199         self.gallery = gm.merge()
 200
 201         # and move the gallery starts
 202         if gm.was_merged:
 203             for chunk in self[len(self) - len_other:]:
 204                 old_start = chunk.gallery_start or 1
 205                 chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
 206                 chunk.save()
 207
 208         other.delete()
 209
 210     @transaction.commit_on_success
 211     def prepend_history(self, other):
 212         """Prepend history from all the other book's chunks to own."""
 213         assert self != other
 214
 215         for i in range(len(self), len(other)):
 216             title = u"pusta część %d" % i
 217             chunk = self.add(slughifi(title), title)
 218             chunk.commit('')
 219
 220         for i in range(len(other)):
 221             self[i].prepend_history(other[0])
 222
 223         assert not other.chunk_set.exists()
 224         other.delete()
 225
 226     def split(self):
 227         """Splits all the chunks into separate books."""
 228         for chunk in self:
 229             book = Book.objects.create(title=chunk.title, slug=chunk.slug, public=self.public, gallery=self.gallery)
 230             book[0].delete()
 231             chunk.book = book
 232             chunk.number = 1
 233             chunk.save()
 234         assert not self.chunk_set.exists()
 235         self.delete()
 236
 237     # State & cache
 238     # =============
 239
 240     def last_published(self):
 241         try:
 242             return self.publish_log.all()[0].timestamp
 243         except IndexError:
 244             return None
 245
 246     def assert_publishable(self):
 247         assert self.chunk_set.exists(), _('No chunks in the book.')
 248         try:
 249             changes = self.get_current_changes()
 250         except self.NoTextError:
 251             raise AssertionError(_('Not all chunks have publishable revisions.'))
 252
 253         from librarian import NoDublinCore, ParseError, ValidationError
 254
 255         try:
 256             bi = self.wldocument(changes=changes, strict=True).book_info
 257             if not bi.audience:
 258                 raise ValidationError('No audience specified')
 259             if not bi.type:
 260                 raise ValidationError('No type specified')
 261         except ParseError, e:
 262             raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
 263         except NoDublinCore:
 264             raise AssertionError(_('No Dublin Core found.'))
 265         except ValidationError, e:
 266             raise AssertionError(_('Invalid Dublin Core') + ': ' + unicode(e))
 267
 268         valid_about = self.correct_about()
 269         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
 270
 271     def publishable_error(self):
 272         try:
 273             return self.assert_publishable()
 274         except AssertionError, e:
 275             return e
 276
 277     def hidden(self):
 278         return self.slug.startswith('.')
 279
 280     def is_new_publishable(self):
 281         """Checks if book is ready for publishing.
 282
 283         Returns True if there is a publishable version newer than the one
 284         already published.
 285
 286         """
 287         new_publishable = False
 288         if not self.chunk_set.exists():
 289             return False
 290         for chunk in self:
 291             change = chunk.publishable()
 292             if not change:
 293                 return False
 294             if not new_publishable and not change.publish_log.exists():
 295                 new_publishable = True
 296         return new_publishable
 297     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
 298
 299     def is_published(self):
 300         return self.publish_log.exists()
 301     published = cached_in_field('_published')(is_published)
 302
 303     def get_on_track(self):
 304         if self.published:
 305             return -1
 306         stages = [ch.stage.ordering if ch.stage is not None else 0 for ch in self]
 307         if not len(stages):
 308             return 0
 309         return min(stages)
 310     on_track = cached_in_field('_on_track')(get_on_track)
 311
 312     def is_single(self):
 313         return len(self) == 1
 314     single = cached_in_field('_single')(is_single)
 315
 316     @cached_in_field('_short_html')
 317     def short_html(self):
 318         return render_to_string('catalogue/book_list/book.html', {'book': self})
 319
 320     def book_info(self, publishable=True):
 321         try:
 322             book_xml = self.wl1_xml(publishable=publishable)
 323         except self.NoTextError:
 324             pass
 325         else:
 326             from librarian.dcparser import BookInfo
 327             from librarian import NoDublinCore, ParseError, ValidationError
 328             try:
 329                 return BookInfo.from_string(book_xml)
 330             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
 331                 return None
 332
 333     def refresh_dc_cache(self):
 334         update = {
 335             'dc_slug': None,
 336             'dc_cover_image': None,
 337         }
 338
 339         info = self.book_info()
 340         if info is not None:
 341             update['dc_slug'] = info.url.slug
 342             if info.cover_source:
 343                 try:
 344                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
 345                 except Image.DoesNotExist:
 346                     pass
 347                 else:
 348                     if info.cover_source == image.get_full_url():
 349                         update['dc_cover_image'] = image
 350         Book.objects.filter(pk=self.pk).update(**update)
 351
 352     def touch(self):
 353         # this should only really be done when text or publishable status changes
 354         book_content_updated.delay(self)
 355
 356         update = {
 357             "_new_publishable": self.is_new_publishable(),
 358             "_published": self.is_published(),
 359             "_single": self.is_single(),
 360             "_on_track": self.get_on_track(),
 361             "_short_html": None,
 362         }
 363         Book.objects.filter(pk=self.pk).update(**update)
 364         refresh_instance(self)
 365
 366     def refresh(self):
 367         """This should be done offline."""
 368         self.short_html
 369         self.single
 370         self.new_publishable
 371         self.published
 372
 373     # Materializing & publishing
 374     # ==========================
 375
 376     def get_current_changes(self, publishable=True):
 377         """
 378             Returns a list containing one Change for every Chunk in the Book.
 379             Takes the most recent revision (publishable, if set).
 380             Throws an error, if a proper revision is unavailable for a Chunk.
 381         """
 382         if publishable:
 383             changes = [chunk.publishable() for chunk in self]
 384         else:
 385             changes = [chunk.head for chunk in self if chunk.head is not None]
 386         if None in changes:
 387             raise self.NoTextError('Some chunks have no available text.')
 388         return changes
 389
 390     def materialize(self, publishable=False, changes=None):
 391         """
 392             Get full text of the document compiled from chunks.
 393             Takes the current versions of all texts
 394             or versions most recently tagged for publishing,
 395             or a specified iterable changes.
 396         """
 397         if changes is None:
 398             changes = self.get_current_changes(publishable)
 399         return compile_text(change.materialize() for change in changes)
 400
 401     def wldocument(self, publishable=True, changes=None, parse_dublincore=True, strict=False):
 402         from catalogue.ebook_utils import RedakcjaDocProvider
 403         from librarian.parser import WLDocument
 404
 405         return WLDocument.from_string(
 406                 self.wl1_xml(publishable=publishable, changes=changes),
 407                 provider=RedakcjaDocProvider(publishable=publishable),
 408                 parse_dublincore=parse_dublincore,
 409                 strict=strict)
 410
 411     def publish(self, user, host=None):
 412         """
 413             Publishes a book on behalf of a (local) user.
 414         """
 415         import json
 416         import os
 417         from django.conf import settings
 418         self.assert_publishable()
 419         changes = self.get_current_changes()
 420         data = {"lesson_xml": self.wl1_xml(changes=changes)}
 421         if host:
 422             gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug)
 423             gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug)
 424             if os.path.isdir(gallery_dir):
 425                 data['gallery_url'] = gallery_url
 426                 data['attachments'] = json.dumps(os.listdir(gallery_dir))
 427         apiclient.api_call(user, "lessons/", data)
 428         # record the publish
 429         br = BookPublishRecord.objects.create(book=self, user=user)
 430         for c in changes:
 431             ChunkPublishRecord.objects.create(book_record=br, change=c)
 432         post_publish.send(sender=br)
 433
 434     def wl1_xml(self, publishable=True, changes=None):
 435         from lxml import etree
 436         import re
 437         from StringIO import StringIO
 438         from urllib import unquote
 439         import os.path
 440         from django.conf import settings
 441         from fnpdjango.utils.text.slughifi import slughifi
 442         from librarian import ParseError, DCNS
 443
 444         def _register_function(f):
 445             """ Register extension function with lxml """
 446             ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
 447             ns[f.__name__] = f
 448             return f
 449
 450         @_register_function
 451         def slugify(context, text):
 452             """Remove unneeded whitespace from beginning and end"""
 453             if isinstance(text, list):
 454                 text = ''.join(text)
 455             return slughifi(text)
 456
 457         @_register_function
 458         def rmext(context, text):
 459             if isinstance(text, list):
 460                 text = ''.join(text)
 461             text = unquote(text)
 462             if '.' in text:
 463                 name, ext = text.rsplit('.', 1)
 464                 if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'):
 465                     text = name
 466             return text
 467
 468         t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt'))
 469         ft = self.materialize(publishable=publishable, changes=changes)
 470         ft = ft.replace('&nbsp;', ' ')
 471         f2 = StringIO(ft)
 472         i1 = etree.parse(f2)
 473
 474         for sect in i1.findall('//section'):
 475             if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć':
 476                 # Prostujemy.
 477                 first = sect.find('section')
 478                 subs = first.findall('.//section')
 479                 for sub in subs:
 480                     sect.append(sub)
 481                 break
 482         else:
 483             # print 'BRAK PRZEBIEGU'
 484             dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri})
 485             if dc_type and dc_type[0] in ('course', 'synthetic'):
 486                 raise ParseError('Brak przebiegu')
 487
 488         i1.getroot().attrib['redslug'] = self.slug
 489         i1.getroot().attrib['wlslug'] = self.slug  # THIS!
 490         # print '.',
 491         w1t = i1.xslt(t)
 492         for h in w1t.findall('//aktywnosc/opis'):
 493             if len(h) == 0:
 494                 raise ParseError('Pusty element aktywnosc/opis')
 495             # FIXME assumption that every lesson has at most 9 parts
 496             if not h[0].text or not re.match(r'\d\.\s', h[0].text):
 497                 raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text))
 498             h[0].text = h[0].text[3:]
 499         return etree.tostring(w1t, encoding='utf-8')