Optimization.
[redakcja.git] / src / documents / models / book.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.apps import apps
5 from django.core.files.base import ContentFile
6 from django.contrib.sites.models import Site
7 from django.db import connection, models, transaction
8 from django.template.loader import render_to_string
9 from django.urls import reverse
10 from django.utils.translation import gettext_lazy as _
11 from django.conf import settings
12 from slugify import slugify
13 from librarian.cover import make_cover
14 from librarian.dcparser import BookInfo
15
16 import apiclient
17 from documents.helpers import cached_in_field, GalleryMerger
18 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
19 from documents.signals import post_publish
20 from documents.xml_tools import compile_text, split_xml
21 from cover.models import Image
22 from io import BytesIO
23 import os
24 import shutil
25 import re
26
27
28 class Book(models.Model):
29     """ A document edited on the wiki """
30
31     title = models.CharField(_('title'), max_length=255, db_index=True)
32     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
33     public = models.BooleanField(_('public'), default=True, db_index=True)
34     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
35     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
36
37     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
38     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
39
40     # Cache
41     _single = models.BooleanField(editable=False, null=True, db_index=True)
42     _new_publishable = models.BooleanField(editable=False, null=True)
43     _published = models.BooleanField(editable=False, null=True)
44     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
45     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
46         db_index=True, on_delete=models.SET_NULL, editable=False)
47     dc = models.JSONField(null=True, editable=False)
48     cover = models.FileField(blank=True, upload_to='documents/cover')
49     catalogue_book = models.ForeignKey(
50         'catalogue.Book',
51         models.DO_NOTHING,
52         to_field='slug',
53         null=True, blank=True,
54         db_constraint=False,
55         editable=False, db_index=True,
56         related_name='document_books',
57         related_query_name='document_book',
58     )
59     legimi_id = models.CharField(max_length=255, blank=True)
60
61     class NoTextError(BaseException):
62         pass
63
64     class Meta:
65         app_label = 'documents'
66         ordering = ['title', 'slug']
67         verbose_name = _('book')
68         verbose_name_plural = _('books')
69
70     @classmethod
71     def get_visible_for(cls, user):
72         qs = cls.objects.all()
73         if not user.is_authenticated:
74             qs = qs.filter(public=True)
75         return qs
76
77     @staticmethod
78     def q_dc(field, field_plural, value, prefix=''):
79         if connection.features.supports_json_field_contains:
80             return models.Q(**{f'{prefix}dc__{field_plural}__contains': value})
81         else:
82             return models.Q(**{f'{prefix}dc__{field}': value})
83             
84     
85     # Representing
86     # ============
87
88     def __iter__(self):
89         return iter(self.chunk_set.all())
90
91     def __getitem__(self, chunk):
92         return self.chunk_set.all()[chunk]
93
94     def __len__(self):
95         return self.chunk_set.count()
96
97     def __bool__(self):
98         """
99             Necessary so that __len__ isn't used for bool evaluation.
100         """
101         return True
102
103     def __str__(self):
104         return self.title
105
106     def get_absolute_url(self):
107         return reverse("documents_book", args=[self.slug])
108
109     def correct_about(self):
110         return "http://%s%s" % (
111             Site.objects.get_current().domain,
112             self.get_absolute_url()
113         )
114
115     def gallery_path(self):
116         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
117
118     def gallery_url(self):
119         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
120
121     # Creating & manipulating
122     # =======================
123
124     def accessible(self, request):
125         return self.public or request.user.is_authenticated
126
127     @classmethod
128     @transaction.atomic
129     def create(cls, creator, text, *args, **kwargs):
130         b = cls.objects.create(*args, **kwargs)
131         b.chunk_set.all().update(creator=creator)
132         b[0].commit(text, author=creator)
133         return b
134
135     def add(self, *args, **kwargs):
136         """Add a new chunk at the end."""
137         return self.chunk_set.reverse()[0].split(*args, **kwargs)
138
139     @classmethod
140     @transaction.atomic
141     def import_xml_text(cls, text=u'', previous_book=None,
142                 commit_args=None, **kwargs):
143         """Imports a book from XML, splitting it into chunks as necessary."""
144         texts = split_xml(text)
145         if previous_book:
146             instance = previous_book
147         else:
148             instance = cls(**kwargs)
149             instance.save()
150
151         # if there are more parts, set the rest to empty strings
152         book_len = len(instance)
153         for i in range(book_len - len(texts)):
154             texts.append((u'pusta część %d' % (i + 1), u''))
155
156         i = 0
157         for i, (title, text) in enumerate(texts):
158             if not title:
159                 title = u'część %d' % (i + 1)
160
161             slug = slugify(title)
162
163             if i < book_len:
164                 chunk = instance[i]
165                 chunk.slug = slug[:50]
166                 chunk.title = title[:255]
167                 chunk.save()
168             else:
169                 chunk = instance.add(slug, title)
170
171             chunk.commit(text, **commit_args)
172
173         return instance
174
175     def make_chunk_slug(self, proposed):
176         """ 
177             Finds a chunk slug not yet used in the book.
178         """
179         slugs = set(c.slug for c in self)
180         i = 1
181         new_slug = proposed[:50]
182         while new_slug in slugs:
183             new_slug = "%s_%d" % (proposed[:45], i)
184             i += 1
185         return new_slug
186
187     @transaction.atomic
188     def append(self, other, slugs=None, titles=None):
189         """Add all chunks of another book to self."""
190         assert self != other
191
192         number = self[len(self) - 1].number + 1
193         len_other = len(other)
194         single = len_other == 1
195
196         if slugs is not None:
197             assert len(slugs) == len_other
198         if titles is not None:
199             assert len(titles) == len_other
200             if slugs is None:
201                 slugs = [slugify(t) for t in titles]
202
203         for i, chunk in enumerate(other):
204             # move chunk to new book
205             chunk.book = self
206             chunk.number = number
207
208             if titles is None:
209                 # try some title guessing
210                 if other.title.startswith(self.title):
211                     other_title_part = other.title[len(self.title):].lstrip(' /')
212                 else:
213                     other_title_part = other.title
214
215                 if single:
216                     # special treatment for appending one-parters:
217                     # just use the guessed title and original book slug
218                     chunk.title = other_title_part
219                     if other.slug.startswith(self.slug):
220                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
221                     else:
222                         chunk.slug = other.slug
223                 else:
224                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
225             else:
226                 chunk.slug = slugs[i]
227                 chunk.title = titles[i]
228
229             chunk.slug = self.make_chunk_slug(chunk.slug)
230             chunk.save()
231             number += 1
232         assert not other.chunk_set.exists()
233
234         gm = GalleryMerger(self.gallery, other.gallery)
235         self.gallery = gm.merge()
236
237         # and move the gallery starts
238         if gm.was_merged:
239                 for chunk in self[len(self) - len_other:]:
240                         old_start = chunk.gallery_start or 1
241                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
242                         chunk.save()
243
244         other.delete()
245
246
247     @transaction.atomic
248     def prepend_history(self, other):
249         """Prepend history from all the other book's chunks to own."""
250         assert self != other
251
252         for i in range(len(self), len(other)):
253             title = u"pusta część %d" % i
254             chunk = self.add(slugify(title), title)
255             chunk.commit('')
256
257         for i in range(len(other)):
258             self[i].prepend_history(other[0])
259
260         assert not other.chunk_set.exists()
261         other.delete()
262
263     def split(self):
264         """Splits all the chunks into separate books."""
265         self.title
266         for chunk in self:
267             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
268                     public=self.public, gallery=self.gallery)
269             book[0].delete()
270             chunk.book = book
271             chunk.number = 1
272             chunk.save()
273         assert not self.chunk_set.exists()
274         self.delete()
275
276     # State & cache
277     # =============
278
279     def last_published(self):
280         try:
281             return self.publish_log.all()[0].timestamp
282         except IndexError:
283             return None
284
285     def last_legimi_publish(self):
286         return self.legimibookpublish_set.order_by('-created_at').first()
287
288     def assert_publishable(self):
289         assert self.chunk_set.exists(), _('No chunks in the book.')
290         try:
291             changes = self.get_current_changes(publishable=True)
292         except self.NoTextError:
293             raise AssertionError(_('Not all chunks have publishable revisions.'))
294
295         from librarian import NoDublinCore, ParseError, ValidationError
296
297         try:
298             bi = self.wldocument(changes=changes, strict=True).book_info
299         except ParseError as e:
300             raise AssertionError(_('Invalid XML') + ': ' + str(e))
301         except NoDublinCore:
302             raise AssertionError(_('No Dublin Core found.'))
303         except ValidationError as e:
304             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
305
306         valid_about = self.correct_about()
307         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
308
309     def publishable_error(self):
310         try:
311             return self.assert_publishable()
312         except AssertionError as e:
313             return e
314         else:
315             return None
316
317     def hidden(self):
318         return self.slug.startswith('.')
319
320     def is_new_publishable(self):
321         """Checks if book is ready for publishing.
322
323         Returns True if there is a publishable version newer than the one
324         already published.
325
326         """
327         new_publishable = False
328         if not self.chunk_set.exists():
329             return False
330         for chunk in self:
331             change = chunk.publishable()
332             if not change:
333                 return False
334             if not new_publishable and not change.publish_log.exists():
335                 new_publishable = True
336         return new_publishable
337     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
338
339     def is_published(self):
340         return self.publish_log.exists()
341     published = cached_in_field('_published')(is_published)
342
343     def get_on_track(self):
344         if self.published:
345             return -1
346         stages = [ch.stage.ordering if ch.stage is not None else 0
347                     for ch in self]
348         if not len(stages):
349             return 0
350         return min(stages)
351     on_track = cached_in_field('_on_track')(get_on_track)
352
353     def is_single(self):
354         return len(self) == 1
355     single = cached_in_field('_single')(is_single)
356
357     def book_info(self, publishable=True):
358         try:
359             book_xml = self.materialize(publishable=publishable)
360         except self.NoTextError:
361             pass
362         else:
363             from librarian.dcparser import BookInfo
364             from librarian import NoDublinCore, ParseError, ValidationError
365             try:
366                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
367             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
368                 return None
369
370     def refresh_dc_cache(self):
371         update = {
372             'catalogue_book_id': None,
373             'dc_cover_image': None,
374         }
375
376         info = self.book_info()
377         if info is not None:
378             update['catalogue_book_id'] = info.url.slug
379             if info.cover_source:
380                 try:
381                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
382                 except:
383                     pass
384                 else:
385                     if info.cover_source == image.get_full_url():
386                         update['dc_cover_image'] = image
387             update['dc'] = info.to_dict()
388         Book.objects.filter(pk=self.pk).update(**update)
389
390     def touch(self):
391         update = {
392             "_new_publishable": self.is_new_publishable(),
393             "_published": self.is_published(),
394             "_single": self.is_single(),
395             "_on_track": self.get_on_track(),
396         }
397         Book.objects.filter(pk=self.pk).update(**update)
398         self.refresh_dc_cache()
399         self.build_cover()
400
401     def build_cover(self):
402         width, height = 212, 300
403         try:
404             xml = self.materialize(publishable=True).encode('utf-8')
405             info = BookInfo.from_bytes(xml)
406             cover = make_cover(info, width=width, height=height)
407             out = BytesIO()
408             ext = cover.ext()
409             cover.save(out)
410             self.cover.save(f'{self.slug}.{ext}', out, save=False)
411             type(self).objects.filter(pk=self.pk).update(cover=self.cover)
412         except:
413             type(self).objects.filter(pk=self.pk).update(cover='')
414
415     # Materializing & publishing
416     # ==========================
417
418     def get_current_changes(self, publishable=True):
419         """
420             Returns a list containing one Change for every Chunk in the Book.
421             Takes the most recent revision (publishable, if set).
422             Throws an error, if a proper revision is unavailable for a Chunk.
423         """
424         if publishable:
425             changes = [chunk.publishable() for chunk in self]
426         else:
427             changes = [chunk.head for chunk in self if chunk.head is not None]
428         if None in changes:
429             raise self.NoTextError('Some chunks have no available text.')
430         return changes
431
432     def materialize(self, publishable=False, changes=None):
433         """ 
434             Get full text of the document compiled from chunks.
435             Takes the current versions of all texts
436             or versions most recently tagged for publishing,
437             or a specified iterable changes.
438         """
439         if changes is None:
440             changes = self.get_current_changes(publishable)
441         return compile_text(change.materialize() for change in changes)
442
443     def wldocument(self, publishable=True, changes=None, 
444                    parse_dublincore=True, strict=False, librarian2=False):
445         from documents.ebook_utils import RedakcjaDocProvider
446         from librarian.parser import WLDocument
447         from librarian.document import WLDocument as WLDocument2
448
449         provider = RedakcjaDocProvider(publishable=publishable)
450         xml = self.materialize(publishable=publishable, changes=changes).encode('utf-8')
451         
452         if librarian2:
453             return WLDocument2(
454                 BytesIO(xml),
455                 provider=provider)
456         return WLDocument.from_bytes(
457                 xml,
458                 provider=provider,
459                 parse_dublincore=parse_dublincore,
460                 strict=strict)
461
462     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
463         """
464             Publishes a book on behalf of a (local) user.
465         """
466         self.assert_publishable()
467         changes = self.get_current_changes(publishable=True)
468         if not fake:
469             book_xml = self.materialize(changes=changes)
470             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
471             if host:
472                 data['gallery_url'] = host + self.gallery_url()
473             apiclient.api_call(user, "books/", data, beta=beta)
474         if not beta:
475             # record the publish
476             br = BookPublishRecord.objects.create(book=self, user=user)
477             for c in changes:
478                 ChunkPublishRecord.objects.create(book_record=br, change=c)
479             if not self.public and days == 0:
480                 self.public = True
481                 self.save()
482             if self.public and days > 0:
483                 self.public = False
484                 self.save()
485             post_publish.send(sender=br)
486
487     def latex_dir(self):
488         doc = self.wldocument()
489         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())