src/documents/models/book.py

   1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from django.apps import apps
   5 from django.contrib.sites.models import Site
   6 from django.db import models, transaction
   7 from django.template.loader import render_to_string
   8 from django.urls import reverse
   9 from django.utils.translation import ugettext_lazy as _
  10 from django.conf import settings
  11 from slugify import slugify
  12
  13
  14 import apiclient
  15 from documents.helpers import cached_in_field, GalleryMerger
  16 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
  17 from documents.signals import post_publish
  18 from documents.xml_tools import compile_text, split_xml
  19 from cover.models import Image
  20 import os
  21 import shutil
  22 import re
  23
  24 class Book(models.Model):
  25     """ A document edited on the wiki """
  26
  27     title = models.CharField(_('title'), max_length=255, db_index=True)
  28     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
  29     public = models.BooleanField(_('public'), default=True, db_index=True)
  30     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
  31     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
  32
  33     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
  34     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  35
  36     # Cache
  37     _single = models.BooleanField(editable=False, null=True, db_index=True)
  38     _new_publishable = models.BooleanField(editable=False, null=True)
  39     _published = models.BooleanField(editable=False, null=True)
  40     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
  41     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
  42         db_index=True, on_delete=models.SET_NULL, editable=False)
  43     catalogue_book = models.ForeignKey(
  44         'catalogue.Book',
  45         models.DO_NOTHING,
  46         to_field='slug',
  47         null=True, blank=True,
  48         db_constraint=False,
  49         editable=False, db_index=True,
  50         related_name='document_books',
  51         related_query_name='document_book',
  52     )
  53
  54     class NoTextError(BaseException):
  55         pass
  56
  57     class Meta:
  58         app_label = 'documents'
  59         ordering = ['title', 'slug']
  60         verbose_name = _('book')
  61         verbose_name_plural = _('books')
  62
  63     @classmethod
  64     def get_visible_for(cls, user):
  65         qs = cls.objects.all()
  66         if not user.is_authenticated:
  67             qs = qs.filter(public=True)
  68         return qs
  69
  70     # Representing
  71     # ============
  72
  73     def __iter__(self):
  74         return iter(self.chunk_set.all())
  75
  76     def __getitem__(self, chunk):
  77         return self.chunk_set.all()[chunk]
  78
  79     def __len__(self):
  80         return self.chunk_set.count()
  81
  82     def __bool__(self):
  83         """
  84             Necessary so that __len__ isn't used for bool evaluation.
  85         """
  86         return True
  87
  88     def __str__(self):
  89         return self.title
  90
  91     def get_absolute_url(self):
  92         return reverse("documents_book", args=[self.slug])
  93
  94     def correct_about(self):
  95         return "http://%s%s" % (
  96             Site.objects.get_current().domain,
  97             self.get_absolute_url()
  98         )
  99
 100     def gallery_path(self):
 101         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
 102
 103     def gallery_url(self):
 104         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
 105
 106     # Creating & manipulating
 107     # =======================
 108
 109     def accessible(self, request):
 110         return self.public or request.user.is_authenticated
 111
 112     @classmethod
 113     @transaction.atomic
 114     def create(cls, creator, text, *args, **kwargs):
 115         b = cls.objects.create(*args, **kwargs)
 116         b.chunk_set.all().update(creator=creator)
 117         b[0].commit(text, author=creator)
 118         return b
 119
 120     def add(self, *args, **kwargs):
 121         """Add a new chunk at the end."""
 122         return self.chunk_set.reverse()[0].split(*args, **kwargs)
 123
 124     @classmethod
 125     @transaction.atomic
 126     def import_xml_text(cls, text=u'', previous_book=None,
 127                 commit_args=None, **kwargs):
 128         """Imports a book from XML, splitting it into chunks as necessary."""
 129         texts = split_xml(text)
 130         if previous_book:
 131             instance = previous_book
 132         else:
 133             instance = cls(**kwargs)
 134             instance.save()
 135
 136         # if there are more parts, set the rest to empty strings
 137         book_len = len(instance)
 138         for i in range(book_len - len(texts)):
 139             texts.append((u'pusta część %d' % (i + 1), u''))
 140
 141         i = 0
 142         for i, (title, text) in enumerate(texts):
 143             if not title:
 144                 title = u'część %d' % (i + 1)
 145
 146             slug = slugify(title)
 147
 148             if i < book_len:
 149                 chunk = instance[i]
 150                 chunk.slug = slug[:50]
 151                 chunk.title = title[:255]
 152                 chunk.save()
 153             else:
 154                 chunk = instance.add(slug, title)
 155
 156             chunk.commit(text, **commit_args)
 157
 158         return instance
 159
 160     def make_chunk_slug(self, proposed):
 161         """
 162             Finds a chunk slug not yet used in the book.
 163         """
 164         slugs = set(c.slug for c in self)
 165         i = 1
 166         new_slug = proposed[:50]
 167         while new_slug in slugs:
 168             new_slug = "%s_%d" % (proposed[:45], i)
 169             i += 1
 170         return new_slug
 171
 172     @transaction.atomic
 173     def append(self, other, slugs=None, titles=None):
 174         """Add all chunks of another book to self."""
 175         assert self != other
 176
 177         number = self[len(self) - 1].number + 1
 178         len_other = len(other)
 179         single = len_other == 1
 180
 181         if slugs is not None:
 182             assert len(slugs) == len_other
 183         if titles is not None:
 184             assert len(titles) == len_other
 185             if slugs is None:
 186                 slugs = [slugify(t) for t in titles]
 187
 188         for i, chunk in enumerate(other):
 189             # move chunk to new book
 190             chunk.book = self
 191             chunk.number = number
 192
 193             if titles is None:
 194                 # try some title guessing
 195                 if other.title.startswith(self.title):
 196                     other_title_part = other.title[len(self.title):].lstrip(' /')
 197                 else:
 198                     other_title_part = other.title
 199
 200                 if single:
 201                     # special treatment for appending one-parters:
 202                     # just use the guessed title and original book slug
 203                     chunk.title = other_title_part
 204                     if other.slug.startswith(self.slug):
 205                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
 206                     else:
 207                         chunk.slug = other.slug
 208                 else:
 209                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
 210             else:
 211                 chunk.slug = slugs[i]
 212                 chunk.title = titles[i]
 213
 214             chunk.slug = self.make_chunk_slug(chunk.slug)
 215             chunk.save()
 216             number += 1
 217         assert not other.chunk_set.exists()
 218
 219         gm = GalleryMerger(self.gallery, other.gallery)
 220         self.gallery = gm.merge()
 221
 222         # and move the gallery starts
 223         if gm.was_merged:
 224                 for chunk in self[len(self) - len_other:]:
 225                         old_start = chunk.gallery_start or 1
 226                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
 227                         chunk.save()
 228
 229         other.delete()
 230
 231
 232     @transaction.atomic
 233     def prepend_history(self, other):
 234         """Prepend history from all the other book's chunks to own."""
 235         assert self != other
 236
 237         for i in range(len(self), len(other)):
 238             title = u"pusta część %d" % i
 239             chunk = self.add(slugify(title), title)
 240             chunk.commit('')
 241
 242         for i in range(len(other)):
 243             self[i].prepend_history(other[0])
 244
 245         assert not other.chunk_set.exists()
 246         other.delete()
 247
 248     def split(self):
 249         """Splits all the chunks into separate books."""
 250         self.title
 251         for chunk in self:
 252             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
 253                     public=self.public, gallery=self.gallery)
 254             book[0].delete()
 255             chunk.book = book
 256             chunk.number = 1
 257             chunk.save()
 258         assert not self.chunk_set.exists()
 259         self.delete()
 260
 261     # State & cache
 262     # =============
 263
 264     def last_published(self):
 265         try:
 266             return self.publish_log.all()[0].timestamp
 267         except IndexError:
 268             return None
 269
 270     def assert_publishable(self):
 271         assert self.chunk_set.exists(), _('No chunks in the book.')
 272         try:
 273             changes = self.get_current_changes(publishable=True)
 274         except self.NoTextError:
 275             raise AssertionError(_('Not all chunks have publishable revisions.'))
 276
 277         from librarian import NoDublinCore, ParseError, ValidationError
 278
 279         try:
 280             bi = self.wldocument(changes=changes, strict=True).book_info
 281         except ParseError as e:
 282             raise AssertionError(_('Invalid XML') + ': ' + str(e))
 283         except NoDublinCore:
 284             raise AssertionError(_('No Dublin Core found.'))
 285         except ValidationError as e:
 286             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
 287
 288         valid_about = self.correct_about()
 289         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
 290
 291     def publishable_error(self):
 292         try:
 293             return self.assert_publishable()
 294         except AssertionError as e:
 295             return e
 296         else:
 297             return None
 298
 299     def hidden(self):
 300         return self.slug.startswith('.')
 301
 302     def is_new_publishable(self):
 303         """Checks if book is ready for publishing.
 304
 305         Returns True if there is a publishable version newer than the one
 306         already published.
 307
 308         """
 309         new_publishable = False
 310         if not self.chunk_set.exists():
 311             return False
 312         for chunk in self:
 313             change = chunk.publishable()
 314             if not change:
 315                 return False
 316             if not new_publishable and not change.publish_log.exists():
 317                 new_publishable = True
 318         return new_publishable
 319     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
 320
 321     def is_published(self):
 322         return self.publish_log.exists()
 323     published = cached_in_field('_published')(is_published)
 324
 325     def get_on_track(self):
 326         if self.published:
 327             return -1
 328         stages = [ch.stage.ordering if ch.stage is not None else 0
 329                     for ch in self]
 330         if not len(stages):
 331             return 0
 332         return min(stages)
 333     on_track = cached_in_field('_on_track')(get_on_track)
 334
 335     def is_single(self):
 336         return len(self) == 1
 337     single = cached_in_field('_single')(is_single)
 338
 339     def book_info(self, publishable=True):
 340         try:
 341             book_xml = self.materialize(publishable=publishable)
 342         except self.NoTextError:
 343             pass
 344         else:
 345             from librarian.dcparser import BookInfo
 346             from librarian import NoDublinCore, ParseError, ValidationError
 347             try:
 348                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
 349             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
 350                 return None
 351
 352     def refresh_dc_cache(self):
 353         update = {
 354             'catalogue_book_id': None,
 355             'dc_cover_image': None,
 356         }
 357
 358         info = self.book_info()
 359         if info is not None:
 360             update['catalogue_book_id'] = info.url.slug
 361             if info.cover_source:
 362                 try:
 363                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
 364                 except:
 365                     pass
 366                 else:
 367                     if info.cover_source == image.get_full_url():
 368                         update['dc_cover_image'] = image
 369         Book.objects.filter(pk=self.pk).update(**update)
 370
 371     def touch(self):
 372         update = {
 373             "_new_publishable": self.is_new_publishable(),
 374             "_published": self.is_published(),
 375             "_single": self.is_single(),
 376             "_on_track": self.get_on_track(),
 377         }
 378         Book.objects.filter(pk=self.pk).update(**update)
 379         self.refresh_dc_cache()
 380
 381     # Materializing & publishing
 382     # ==========================
 383
 384     def get_current_changes(self, publishable=True):
 385         """
 386             Returns a list containing one Change for every Chunk in the Book.
 387             Takes the most recent revision (publishable, if set).
 388             Throws an error, if a proper revision is unavailable for a Chunk.
 389         """
 390         if publishable:
 391             changes = [chunk.publishable() for chunk in self]
 392         else:
 393             changes = [chunk.head for chunk in self if chunk.head is not None]
 394         if None in changes:
 395             raise self.NoTextError('Some chunks have no available text.')
 396         return changes
 397
 398     def materialize(self, publishable=False, changes=None):
 399         """
 400             Get full text of the document compiled from chunks.
 401             Takes the current versions of all texts
 402             or versions most recently tagged for publishing,
 403             or a specified iterable changes.
 404         """
 405         if changes is None:
 406             changes = self.get_current_changes(publishable)
 407         return compile_text(change.materialize() for change in changes)
 408
 409     def wldocument(self, publishable=True, changes=None,
 410             parse_dublincore=True, strict=False):
 411         from documents.ebook_utils import RedakcjaDocProvider
 412         from librarian.parser import WLDocument
 413
 414         return WLDocument.from_bytes(
 415                 self.materialize(publishable=publishable, changes=changes).encode('utf-8'),
 416                 provider=RedakcjaDocProvider(publishable=publishable),
 417                 parse_dublincore=parse_dublincore,
 418                 strict=strict)
 419
 420     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
 421         """
 422             Publishes a book on behalf of a (local) user.
 423         """
 424         self.assert_publishable()
 425         changes = self.get_current_changes(publishable=True)
 426         if not fake:
 427             book_xml = self.materialize(changes=changes)
 428             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
 429             if host:
 430                 data['gallery_url'] = host + self.gallery_url()
 431             apiclient.api_call(user, "books/", data, beta=beta)
 432         if not beta:
 433             # record the publish
 434             br = BookPublishRecord.objects.create(book=self, user=user)
 435             for c in changes:
 436                 ChunkPublishRecord.objects.create(book_record=br, change=c)
 437             if not self.public and days == 0:
 438                 self.public = True
 439                 self.save()
 440             if self.public and days > 0:
 441                 self.public = False
 442                 self.save()
 443             post_publish.send(sender=br)
 444
 445     def latex_dir(self):
 446         doc = self.wldocument()
 447         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())