fix stats api
[redakcja.git] / src / documents / models / book.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.apps import apps
5 from django.core.files.base import ContentFile
6 from django.contrib.sites.models import Site
7 from django.db import connection, models, transaction
8 from django.template.loader import render_to_string
9 from django.urls import reverse
10 from django.utils.translation import gettext_lazy as _
11 from django.conf import settings
12 from slugify import slugify
13 from librarian.cover import make_cover
14 from librarian.dcparser import BookInfo
15
16 import apiclient
17 from documents.helpers import cached_in_field, GalleryMerger
18 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
19 from documents.signals import post_publish
20 from documents.xml_tools import compile_text, split_xml
21 from cover.models import Image
22 from io import BytesIO
23 import os
24 import shutil
25 import re
26
27
28 class Book(models.Model):
29     """ A document edited on the wiki """
30
31     title = models.CharField(_('title'), max_length=255, db_index=True)
32     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
33     public = models.BooleanField(_('public'), default=True, db_index=True)
34     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
35     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
36
37     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
38     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
39
40     # Cache
41     _single = models.BooleanField(editable=False, null=True, db_index=True)
42     _new_publishable = models.BooleanField(editable=False, null=True)
43     _published = models.BooleanField(editable=False, null=True)
44     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
45     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
46         db_index=True, on_delete=models.SET_NULL, editable=False)
47     dc = models.JSONField(null=True, editable=False)
48     cover = models.FileField(blank=True, upload_to='documents/cover')
49     catalogue_book = models.ForeignKey(
50         'catalogue.Book',
51         models.DO_NOTHING,
52         to_field='slug',
53         null=True, blank=True,
54         db_constraint=False,
55         editable=False, db_index=True,
56         related_name='document_books',
57         related_query_name='document_book',
58     )
59     legimi_id = models.CharField(max_length=255, blank=True)
60     woblink_id = models.CharField(max_length=255, blank=True)
61
62     class NoTextError(BaseException):
63         pass
64
65     class Meta:
66         app_label = 'documents'
67         ordering = ['title', 'slug']
68         verbose_name = _('book')
69         verbose_name_plural = _('books')
70
71     @classmethod
72     def get_visible_for(cls, user):
73         qs = cls.objects.all()
74         if not user.is_authenticated:
75             qs = qs.filter(public=True)
76         return qs
77
78     @staticmethod
79     def q_dc(field, field_plural, value, prefix=''):
80         if connection.features.supports_json_field_contains:
81             return models.Q(**{f'{prefix}dc__{field_plural}__contains': value})
82         else:
83             return models.Q(**{f'{prefix}dc__{field}': value})
84             
85     
86     # Representing
87     # ============
88
89     def __iter__(self):
90         return iter(self.chunk_set.all())
91
92     def __getitem__(self, chunk):
93         return self.chunk_set.all()[chunk]
94
95     def __len__(self):
96         return self.chunk_set.count()
97
98     def __bool__(self):
99         """
100             Necessary so that __len__ isn't used for bool evaluation.
101         """
102         return True
103
104     def __str__(self):
105         return self.title
106
107     def get_absolute_url(self):
108         return reverse("documents_book", args=[self.slug])
109
110     def correct_about(self):
111         return "http://%s%s" % (
112             Site.objects.get_current().domain,
113             self.get_absolute_url()
114         )
115
116     def gallery_path(self):
117         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
118
119     def gallery_url(self):
120         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
121
122     # Creating & manipulating
123     # =======================
124
125     def accessible(self, request):
126         return self.public or request.user.is_authenticated
127
128     @classmethod
129     @transaction.atomic
130     def create(cls, creator, text, *args, **kwargs):
131         b = cls.objects.create(*args, **kwargs)
132         b.chunk_set.all().update(creator=creator)
133         b[0].commit(text, author=creator)
134         return b
135
136     def add(self, *args, **kwargs):
137         """Add a new chunk at the end."""
138         return self.chunk_set.reverse()[0].split(*args, **kwargs)
139
140     @classmethod
141     @transaction.atomic
142     def import_xml_text(cls, text=u'', previous_book=None,
143                 commit_args=None, **kwargs):
144         """Imports a book from XML, splitting it into chunks as necessary."""
145         texts = split_xml(text)
146         if previous_book:
147             instance = previous_book
148         else:
149             instance = cls(**kwargs)
150             instance.save()
151
152         # if there are more parts, set the rest to empty strings
153         book_len = len(instance)
154         for i in range(book_len - len(texts)):
155             texts.append((u'pusta część %d' % (i + 1), u''))
156
157         i = 0
158         for i, (title, text) in enumerate(texts):
159             if not title:
160                 title = u'część %d' % (i + 1)
161
162             slug = slugify(title)
163
164             if i < book_len:
165                 chunk = instance[i]
166                 chunk.slug = slug[:50]
167                 chunk.title = title[:255]
168                 chunk.save()
169             else:
170                 chunk = instance.add(slug, title)
171
172             chunk.commit(text, **commit_args)
173
174         return instance
175
176     def make_chunk_slug(self, proposed):
177         """ 
178             Finds a chunk slug not yet used in the book.
179         """
180         slugs = set(c.slug for c in self)
181         i = 1
182         new_slug = proposed[:50]
183         while new_slug in slugs:
184             new_slug = "%s_%d" % (proposed[:45], i)
185             i += 1
186         return new_slug
187
188     @transaction.atomic
189     def append(self, other, slugs=None, titles=None):
190         """Add all chunks of another book to self."""
191         assert self != other
192
193         number = self[len(self) - 1].number + 1
194         len_other = len(other)
195         single = len_other == 1
196
197         if slugs is not None:
198             assert len(slugs) == len_other
199         if titles is not None:
200             assert len(titles) == len_other
201             if slugs is None:
202                 slugs = [slugify(t) for t in titles]
203
204         for i, chunk in enumerate(other):
205             # move chunk to new book
206             chunk.book = self
207             chunk.number = number
208
209             if titles is None:
210                 # try some title guessing
211                 if other.title.startswith(self.title):
212                     other_title_part = other.title[len(self.title):].lstrip(' /')
213                 else:
214                     other_title_part = other.title
215
216                 if single:
217                     # special treatment for appending one-parters:
218                     # just use the guessed title and original book slug
219                     chunk.title = other_title_part
220                     if other.slug.startswith(self.slug):
221                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
222                     else:
223                         chunk.slug = other.slug
224                 else:
225                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
226             else:
227                 chunk.slug = slugs[i]
228                 chunk.title = titles[i]
229
230             chunk.slug = self.make_chunk_slug(chunk.slug)
231             chunk.save()
232             number += 1
233         assert not other.chunk_set.exists()
234
235         gm = GalleryMerger(self.gallery, other.gallery)
236         self.gallery = gm.merge()
237
238         # and move the gallery starts
239         if gm.was_merged:
240                 for chunk in self[len(self) - len_other:]:
241                         old_start = chunk.gallery_start or 1
242                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
243                         chunk.save()
244
245         other.delete()
246
247
248     @transaction.atomic
249     def prepend_history(self, other):
250         """Prepend history from all the other book's chunks to own."""
251         assert self != other
252
253         for i in range(len(self), len(other)):
254             title = u"pusta część %d" % i
255             chunk = self.add(slugify(title), title)
256             chunk.commit('')
257
258         for i in range(len(other)):
259             self[i].prepend_history(other[0])
260
261         assert not other.chunk_set.exists()
262         other.delete()
263
264     def split(self):
265         """Splits all the chunks into separate books."""
266         self.title
267         for chunk in self:
268             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
269                     public=self.public, gallery=self.gallery)
270             book[0].delete()
271             chunk.book = book
272             chunk.number = 1
273             chunk.save()
274         assert not self.chunk_set.exists()
275         self.delete()
276
277     # State & cache
278     # =============
279
280     def last_published(self):
281         try:
282             return self.publish_log.all()[0].timestamp
283         except IndexError:
284             return None
285
286     def assert_publishable(self):
287         assert self.chunk_set.exists(), _('No chunks in the book.')
288         try:
289             changes = self.get_current_changes(publishable=True)
290         except self.NoTextError:
291             raise AssertionError(_('Not all chunks have publishable revisions.'))
292
293         from librarian import NoDublinCore, ParseError, ValidationError
294
295         try:
296             bi = self.wldocument(changes=changes, strict=True).book_info
297         except ParseError as e:
298             raise AssertionError(_('Invalid XML') + ': ' + str(e))
299         except NoDublinCore:
300             raise AssertionError(_('No Dublin Core found.'))
301         except ValidationError as e:
302             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
303
304         valid_about = self.correct_about()
305         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
306
307     def publishable_error(self):
308         try:
309             return self.assert_publishable()
310         except AssertionError as e:
311             return e
312         else:
313             return None
314
315     def hidden(self):
316         return self.slug.startswith('.')
317
318     def is_new_publishable(self):
319         """Checks if book is ready for publishing.
320
321         Returns True if there is a publishable version newer than the one
322         already published.
323
324         """
325         new_publishable = False
326         if not self.chunk_set.exists():
327             return False
328         for chunk in self:
329             change = chunk.publishable()
330             if not change:
331                 return False
332             if not new_publishable and not change.publish_log.exists():
333                 new_publishable = True
334         return new_publishable
335     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
336
337     def is_published(self):
338         return self.publish_log.exists()
339     published = cached_in_field('_published')(is_published)
340
341     def get_on_track(self):
342         if self.published:
343             return -1
344         stages = [ch.stage.ordering if ch.stage is not None else 0
345                     for ch in self]
346         if not len(stages):
347             return 0
348         return min(stages)
349     on_track = cached_in_field('_on_track')(get_on_track)
350
351     def is_single(self):
352         return len(self) == 1
353     single = cached_in_field('_single')(is_single)
354
355     def book_info(self, publishable=True):
356         try:
357             book_xml = self.materialize(publishable=publishable)
358         except self.NoTextError:
359             pass
360         else:
361             from librarian.dcparser import BookInfo
362             from librarian import NoDublinCore, ParseError, ValidationError
363             try:
364                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
365             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
366                 return None
367
368     def refresh_dc_cache(self):
369         update = {
370             'catalogue_book_id': None,
371             'dc_cover_image': None,
372         }
373
374         info = self.book_info()
375         if info is not None:
376             update['catalogue_book_id'] = info.url.slug
377             if info.cover_source:
378                 try:
379                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
380                 except:
381                     pass
382                 else:
383                     if info.cover_source == image.get_full_url():
384                         update['dc_cover_image'] = image
385             update['dc'] = info.to_dict()
386         Book.objects.filter(pk=self.pk).update(**update)
387
388     def touch(self):
389         update = {
390             "_new_publishable": self.is_new_publishable(),
391             "_published": self.is_published(),
392             "_single": self.is_single(),
393             "_on_track": self.get_on_track(),
394         }
395         Book.objects.filter(pk=self.pk).update(**update)
396         self.refresh_dc_cache()
397         self.build_cover()
398
399     def build_cover(self):
400         width, height = 212, 300
401         try:
402             xml = self.materialize(publishable=True).encode('utf-8')
403             info = BookInfo.from_bytes(xml)
404             cover = make_cover(info, width=width, height=height)
405             out = BytesIO()
406             ext = cover.ext()
407             cover.save(out)
408             self.cover.save(f'{self.slug}.{ext}', out, save=False)
409             type(self).objects.filter(pk=self.pk).update(cover=self.cover)
410         except:
411             type(self).objects.filter(pk=self.pk).update(cover='')
412
413     # Materializing & publishing
414     # ==========================
415
416     def get_current_changes(self, publishable=True):
417         """
418             Returns a list containing one Change for every Chunk in the Book.
419             Takes the most recent revision (publishable, if set).
420             Throws an error, if a proper revision is unavailable for a Chunk.
421         """
422         if publishable:
423             changes = [chunk.publishable() for chunk in self]
424         else:
425             changes = [chunk.head for chunk in self if chunk.head is not None]
426         if None in changes:
427             raise self.NoTextError('Some chunks have no available text.')
428         return changes
429
430     def materialize(self, publishable=False, changes=None):
431         """ 
432             Get full text of the document compiled from chunks.
433             Takes the current versions of all texts
434             or versions most recently tagged for publishing,
435             or a specified iterable changes.
436         """
437         if changes is None:
438             changes = self.get_current_changes(publishable)
439         return compile_text(change.materialize() for change in changes)
440
441     def wldocument(self, publishable=True, changes=None, 
442                    parse_dublincore=True, strict=False, librarian2=False):
443         from documents.ebook_utils import RedakcjaDocProvider
444         from librarian.parser import WLDocument
445         from librarian.document import WLDocument as WLDocument2
446
447         provider = RedakcjaDocProvider(publishable=publishable)
448         xml = self.materialize(publishable=publishable, changes=changes).encode('utf-8')
449         
450         if librarian2:
451             return WLDocument2(
452                 BytesIO(xml),
453                 provider=provider)
454         return WLDocument.from_bytes(
455                 xml,
456                 provider=provider,
457                 parse_dublincore=parse_dublincore,
458                 strict=strict)
459
460     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
461         """
462             Publishes a book on behalf of a (local) user.
463         """
464         self.assert_publishable()
465         changes = self.get_current_changes(publishable=True)
466         if not fake:
467             book_xml = self.materialize(changes=changes)
468             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
469             if host:
470                 data['gallery_url'] = host + self.gallery_url()
471             apiclient.api_call(user, "books/", data, beta=beta)
472         if not beta:
473             # record the publish
474             br = BookPublishRecord.objects.create(book=self, user=user)
475             for c in changes:
476                 ChunkPublishRecord.objects.create(book_record=br, change=c)
477             if not self.public and days == 0:
478                 self.public = True
479                 self.save()
480             if self.public and days > 0:
481                 self.public = False
482                 self.save()
483             post_publish.send(sender=br)
484
485     def latex_dir(self):
486         doc = self.wldocument()
487         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())