src/documents/models/book.py

   1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from django.apps import apps
   5 from django.contrib.sites.models import Site
   6 from django.db import models, transaction
   7 from django.template.loader import render_to_string
   8 from django.urls import reverse
   9 from django.utils.translation import ugettext_lazy as _
  10 from django.conf import settings
  11 from slugify import slugify
  12
  13
  14 import apiclient
  15 from documents.helpers import cached_in_field, GalleryMerger
  16 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
  17 from documents.signals import post_publish
  18 from documents.xml_tools import compile_text, split_xml
  19 from cover.models import Image
  20 from io import BytesIO
  21 import os
  22 import shutil
  23 import re
  24
  25 class Book(models.Model):
  26     """ A document edited on the wiki """
  27
  28     title = models.CharField(_('title'), max_length=255, db_index=True)
  29     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
  30     public = models.BooleanField(_('public'), default=True, db_index=True)
  31     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
  32     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
  33
  34     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
  35     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
  36
  37     # Cache
  38     _single = models.BooleanField(editable=False, null=True, db_index=True)
  39     _new_publishable = models.BooleanField(editable=False, null=True)
  40     _published = models.BooleanField(editable=False, null=True)
  41     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
  42     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
  43         db_index=True, on_delete=models.SET_NULL, editable=False)
  44     catalogue_book = models.ForeignKey(
  45         'catalogue.Book',
  46         models.DO_NOTHING,
  47         to_field='slug',
  48         null=True, blank=True,
  49         db_constraint=False,
  50         editable=False, db_index=True,
  51         related_name='document_books',
  52         related_query_name='document_book',
  53     )
  54
  55     class NoTextError(BaseException):
  56         pass
  57
  58     class Meta:
  59         app_label = 'documents'
  60         ordering = ['title', 'slug']
  61         verbose_name = _('book')
  62         verbose_name_plural = _('books')
  63
  64     @classmethod
  65     def get_visible_for(cls, user):
  66         qs = cls.objects.all()
  67         if not user.is_authenticated:
  68             qs = qs.filter(public=True)
  69         return qs
  70
  71     # Representing
  72     # ============
  73
  74     def __iter__(self):
  75         return iter(self.chunk_set.all())
  76
  77     def __getitem__(self, chunk):
  78         return self.chunk_set.all()[chunk]
  79
  80     def __len__(self):
  81         return self.chunk_set.count()
  82
  83     def __bool__(self):
  84         """
  85             Necessary so that __len__ isn't used for bool evaluation.
  86         """
  87         return True
  88
  89     def __str__(self):
  90         return self.title
  91
  92     def get_absolute_url(self):
  93         return reverse("documents_book", args=[self.slug])
  94
  95     def correct_about(self):
  96         return "http://%s%s" % (
  97             Site.objects.get_current().domain,
  98             self.get_absolute_url()
  99         )
 100
 101     def gallery_path(self):
 102         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
 103
 104     def gallery_url(self):
 105         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
 106
 107     # Creating & manipulating
 108     # =======================
 109
 110     def accessible(self, request):
 111         return self.public or request.user.is_authenticated
 112
 113     @classmethod
 114     @transaction.atomic
 115     def create(cls, creator, text, *args, **kwargs):
 116         b = cls.objects.create(*args, **kwargs)
 117         b.chunk_set.all().update(creator=creator)
 118         b[0].commit(text, author=creator)
 119         return b
 120
 121     def add(self, *args, **kwargs):
 122         """Add a new chunk at the end."""
 123         return self.chunk_set.reverse()[0].split(*args, **kwargs)
 124
 125     @classmethod
 126     @transaction.atomic
 127     def import_xml_text(cls, text=u'', previous_book=None,
 128                 commit_args=None, **kwargs):
 129         """Imports a book from XML, splitting it into chunks as necessary."""
 130         texts = split_xml(text)
 131         if previous_book:
 132             instance = previous_book
 133         else:
 134             instance = cls(**kwargs)
 135             instance.save()
 136
 137         # if there are more parts, set the rest to empty strings
 138         book_len = len(instance)
 139         for i in range(book_len - len(texts)):
 140             texts.append((u'pusta część %d' % (i + 1), u''))
 141
 142         i = 0
 143         for i, (title, text) in enumerate(texts):
 144             if not title:
 145                 title = u'część %d' % (i + 1)
 146
 147             slug = slugify(title)
 148
 149             if i < book_len:
 150                 chunk = instance[i]
 151                 chunk.slug = slug[:50]
 152                 chunk.title = title[:255]
 153                 chunk.save()
 154             else:
 155                 chunk = instance.add(slug, title)
 156
 157             chunk.commit(text, **commit_args)
 158
 159         return instance
 160
 161     def make_chunk_slug(self, proposed):
 162         """
 163             Finds a chunk slug not yet used in the book.
 164         """
 165         slugs = set(c.slug for c in self)
 166         i = 1
 167         new_slug = proposed[:50]
 168         while new_slug in slugs:
 169             new_slug = "%s_%d" % (proposed[:45], i)
 170             i += 1
 171         return new_slug
 172
 173     @transaction.atomic
 174     def append(self, other, slugs=None, titles=None):
 175         """Add all chunks of another book to self."""
 176         assert self != other
 177
 178         number = self[len(self) - 1].number + 1
 179         len_other = len(other)
 180         single = len_other == 1
 181
 182         if slugs is not None:
 183             assert len(slugs) == len_other
 184         if titles is not None:
 185             assert len(titles) == len_other
 186             if slugs is None:
 187                 slugs = [slugify(t) for t in titles]
 188
 189         for i, chunk in enumerate(other):
 190             # move chunk to new book
 191             chunk.book = self
 192             chunk.number = number
 193
 194             if titles is None:
 195                 # try some title guessing
 196                 if other.title.startswith(self.title):
 197                     other_title_part = other.title[len(self.title):].lstrip(' /')
 198                 else:
 199                     other_title_part = other.title
 200
 201                 if single:
 202                     # special treatment for appending one-parters:
 203                     # just use the guessed title and original book slug
 204                     chunk.title = other_title_part
 205                     if other.slug.startswith(self.slug):
 206                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
 207                     else:
 208                         chunk.slug = other.slug
 209                 else:
 210                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
 211             else:
 212                 chunk.slug = slugs[i]
 213                 chunk.title = titles[i]
 214
 215             chunk.slug = self.make_chunk_slug(chunk.slug)
 216             chunk.save()
 217             number += 1
 218         assert not other.chunk_set.exists()
 219
 220         gm = GalleryMerger(self.gallery, other.gallery)
 221         self.gallery = gm.merge()
 222
 223         # and move the gallery starts
 224         if gm.was_merged:
 225                 for chunk in self[len(self) - len_other:]:
 226                         old_start = chunk.gallery_start or 1
 227                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
 228                         chunk.save()
 229
 230         other.delete()
 231
 232
 233     @transaction.atomic
 234     def prepend_history(self, other):
 235         """Prepend history from all the other book's chunks to own."""
 236         assert self != other
 237
 238         for i in range(len(self), len(other)):
 239             title = u"pusta część %d" % i
 240             chunk = self.add(slugify(title), title)
 241             chunk.commit('')
 242
 243         for i in range(len(other)):
 244             self[i].prepend_history(other[0])
 245
 246         assert not other.chunk_set.exists()
 247         other.delete()
 248
 249     def split(self):
 250         """Splits all the chunks into separate books."""
 251         self.title
 252         for chunk in self:
 253             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
 254                     public=self.public, gallery=self.gallery)
 255             book[0].delete()
 256             chunk.book = book
 257             chunk.number = 1
 258             chunk.save()
 259         assert not self.chunk_set.exists()
 260         self.delete()
 261
 262     # State & cache
 263     # =============
 264
 265     def last_published(self):
 266         try:
 267             return self.publish_log.all()[0].timestamp
 268         except IndexError:
 269             return None
 270
 271     def assert_publishable(self):
 272         assert self.chunk_set.exists(), _('No chunks in the book.')
 273         try:
 274             changes = self.get_current_changes(publishable=True)
 275         except self.NoTextError:
 276             raise AssertionError(_('Not all chunks have publishable revisions.'))
 277
 278         from librarian import NoDublinCore, ParseError, ValidationError
 279
 280         try:
 281             bi = self.wldocument(changes=changes, strict=True).book_info
 282         except ParseError as e:
 283             raise AssertionError(_('Invalid XML') + ': ' + str(e))
 284         except NoDublinCore:
 285             raise AssertionError(_('No Dublin Core found.'))
 286         except ValidationError as e:
 287             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
 288
 289         valid_about = self.correct_about()
 290         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
 291
 292     def publishable_error(self):
 293         try:
 294             return self.assert_publishable()
 295         except AssertionError as e:
 296             return e
 297         else:
 298             return None
 299
 300     def hidden(self):
 301         return self.slug.startswith('.')
 302
 303     def is_new_publishable(self):
 304         """Checks if book is ready for publishing.
 305
 306         Returns True if there is a publishable version newer than the one
 307         already published.
 308
 309         """
 310         new_publishable = False
 311         if not self.chunk_set.exists():
 312             return False
 313         for chunk in self:
 314             change = chunk.publishable()
 315             if not change:
 316                 return False
 317             if not new_publishable and not change.publish_log.exists():
 318                 new_publishable = True
 319         return new_publishable
 320     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
 321
 322     def is_published(self):
 323         return self.publish_log.exists()
 324     published = cached_in_field('_published')(is_published)
 325
 326     def get_on_track(self):
 327         if self.published:
 328             return -1
 329         stages = [ch.stage.ordering if ch.stage is not None else 0
 330                     for ch in self]
 331         if not len(stages):
 332             return 0
 333         return min(stages)
 334     on_track = cached_in_field('_on_track')(get_on_track)
 335
 336     def is_single(self):
 337         return len(self) == 1
 338     single = cached_in_field('_single')(is_single)
 339
 340     def book_info(self, publishable=True):
 341         try:
 342             book_xml = self.materialize(publishable=publishable)
 343         except self.NoTextError:
 344             pass
 345         else:
 346             from librarian.dcparser import BookInfo
 347             from librarian import NoDublinCore, ParseError, ValidationError
 348             try:
 349                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
 350             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
 351                 return None
 352
 353     def refresh_dc_cache(self):
 354         update = {
 355             'catalogue_book_id': None,
 356             'dc_cover_image': None,
 357         }
 358
 359         info = self.book_info()
 360         if info is not None:
 361             update['catalogue_book_id'] = info.url.slug
 362             if info.cover_source:
 363                 try:
 364                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
 365                 except:
 366                     pass
 367                 else:
 368                     if info.cover_source == image.get_full_url():
 369                         update['dc_cover_image'] = image
 370         Book.objects.filter(pk=self.pk).update(**update)
 371
 372     def touch(self):
 373         update = {
 374             "_new_publishable": self.is_new_publishable(),
 375             "_published": self.is_published(),
 376             "_single": self.is_single(),
 377             "_on_track": self.get_on_track(),
 378         }
 379         Book.objects.filter(pk=self.pk).update(**update)
 380         self.refresh_dc_cache()
 381
 382     # Materializing & publishing
 383     # ==========================
 384
 385     def get_current_changes(self, publishable=True):
 386         """
 387             Returns a list containing one Change for every Chunk in the Book.
 388             Takes the most recent revision (publishable, if set).
 389             Throws an error, if a proper revision is unavailable for a Chunk.
 390         """
 391         if publishable:
 392             changes = [chunk.publishable() for chunk in self]
 393         else:
 394             changes = [chunk.head for chunk in self if chunk.head is not None]
 395         if None in changes:
 396             raise self.NoTextError('Some chunks have no available text.')
 397         return changes
 398
 399     def materialize(self, publishable=False, changes=None):
 400         """
 401             Get full text of the document compiled from chunks.
 402             Takes the current versions of all texts
 403             or versions most recently tagged for publishing,
 404             or a specified iterable changes.
 405         """
 406         if changes is None:
 407             changes = self.get_current_changes(publishable)
 408         return compile_text(change.materialize() for change in changes)
 409
 410     def wldocument(self, publishable=True, changes=None,
 411                    parse_dublincore=True, strict=False, librarian2=False):
 412         from documents.ebook_utils import RedakcjaDocProvider
 413         from librarian.parser import WLDocument
 414         from librarian.document import WLDocument as WLDocument2
 415
 416         provider = RedakcjaDocProvider(publishable=publishable)
 417         xml = self.materialize(publishable=publishable, changes=changes).encode('utf-8')
 418
 419         if librarian2:
 420             return WLDocument2(
 421                 BytesIO(xml),
 422                 provider=provider)
 423         return WLDocument.from_bytes(
 424                 xml,
 425                 provider=provider,
 426                 parse_dublincore=parse_dublincore,
 427                 strict=strict)
 428
 429     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
 430         """
 431             Publishes a book on behalf of a (local) user.
 432         """
 433         self.assert_publishable()
 434         changes = self.get_current_changes(publishable=True)
 435         if not fake:
 436             book_xml = self.materialize(changes=changes)
 437             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
 438             if host:
 439                 data['gallery_url'] = host + self.gallery_url()
 440             apiclient.api_call(user, "books/", data, beta=beta)
 441         if not beta:
 442             # record the publish
 443             br = BookPublishRecord.objects.create(book=self, user=user)
 444             for c in changes:
 445                 ChunkPublishRecord.objects.create(book_record=br, change=c)
 446             if not self.public and days == 0:
 447                 self.public = True
 448                 self.save()
 449             if self.public and days > 0:
 450                 self.public = False
 451                 self.save()
 452             post_publish.send(sender=br)
 453
 454     def latex_dir(self):
 455         doc = self.wldocument()
 456         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())