Allow multiple sources.
[redakcja.git] / src / documents / models / book.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.apps import apps
5 from django.core.files.base import ContentFile
6 from django.contrib.sites.models import Site
7 from django.db import connection, models, transaction
8 from django.template.loader import render_to_string
9 from django.urls import reverse
10 from django.utils.translation import gettext_lazy as _
11 from django.conf import settings
12 from slugify import slugify
13 from librarian.cover import make_cover
14 from librarian.dcparser import BookInfo
15
16 import apiclient
17 from documents.helpers import cached_in_field, GalleryMerger
18 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
19 from documents.signals import post_publish
20 from documents.xml_tools import compile_text, split_xml
21 from cover.models import Image
22 from io import BytesIO
23 import os
24 import shutil
25 import re
26 from urllib.parse import urljoin
27
28
29 class Book(models.Model):
30     """ A document edited on the wiki """
31
32     title = models.CharField(_('title'), max_length=255, db_index=True)
33     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
34     public = models.BooleanField(_('public'), default=True, db_index=True)
35     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
36     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
37
38     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
39     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
40
41     # Cache
42     _single = models.BooleanField(editable=False, null=True, db_index=True)
43     _new_publishable = models.BooleanField(editable=False, null=True)
44     _published = models.BooleanField(editable=False, null=True)
45     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
46     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
47         db_index=True, on_delete=models.SET_NULL, editable=False)
48     dc = models.JSONField(null=True, editable=False)
49     cover = models.FileField(blank=True, upload_to='documents/cover')
50     catalogue_book = models.ForeignKey(
51         'catalogue.Book',
52         models.DO_NOTHING,
53         to_field='slug',
54         null=True, blank=True,
55         db_constraint=False,
56         editable=False, db_index=True,
57         related_name='document_books',
58         related_query_name='document_book',
59     )
60
61     class NoTextError(BaseException):
62         pass
63
64     class Meta:
65         app_label = 'documents'
66         ordering = ['title', 'slug']
67         verbose_name = _('book')
68         verbose_name_plural = _('books')
69
70     @classmethod
71     def get_visible_for(cls, user):
72         qs = cls.objects.all()
73         if not user.is_authenticated:
74             qs = qs.filter(public=True)
75         return qs
76
77     @staticmethod
78     def q_dc(field, field_plural, value, prefix=''):
79         if connection.features.supports_json_field_contains:
80             return models.Q(**{f'{prefix}dc__{field_plural}__contains': value})
81         else:
82             return models.Q(**{f'{prefix}dc__{field}': value})
83             
84     
85     # Representing
86     # ============
87
88     def __iter__(self):
89         return iter(self.chunk_set.all())
90
91     def __getitem__(self, chunk):
92         return self.chunk_set.all()[chunk]
93
94     def __len__(self):
95         return self.chunk_set.count()
96
97     def __bool__(self):
98         """
99             Necessary so that __len__ isn't used for bool evaluation.
100         """
101         return True
102
103     def __str__(self):
104         return self.title
105
106     def get_absolute_url(self):
107         return reverse("documents_book", args=[self.slug])
108
109     def correct_about(self):
110         return "http://%s%s" % (
111             Site.objects.get_current().domain,
112             self.get_absolute_url()
113         )
114
115     def gallery_path(self):
116         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
117
118     def gallery_url(self):
119         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
120
121     # Creating & manipulating
122     # =======================
123
124     def accessible(self, request):
125         return self.public or request.user.is_authenticated
126
127     @classmethod
128     @transaction.atomic
129     def create(cls, creator, text, *args, **kwargs):
130         b = cls.objects.create(*args, **kwargs)
131         b.chunk_set.all().update(creator=creator)
132         b[0].commit(text, author=creator)
133         return b
134
135     def add(self, *args, **kwargs):
136         """Add a new chunk at the end."""
137         return self.chunk_set.reverse()[0].split(*args, **kwargs)
138
139     @classmethod
140     @transaction.atomic
141     def import_xml_text(cls, text=u'', previous_book=None,
142                 commit_args=None, **kwargs):
143         """Imports a book from XML, splitting it into chunks as necessary."""
144         texts = split_xml(text)
145         if previous_book:
146             instance = previous_book
147         else:
148             instance = cls(**kwargs)
149             instance.save()
150
151         # if there are more parts, set the rest to empty strings
152         book_len = len(instance)
153         for i in range(book_len - len(texts)):
154             texts.append((u'pusta część %d' % (i + 1), u''))
155
156         i = 0
157         for i, (title, text) in enumerate(texts):
158             if not title:
159                 title = u'część %d' % (i + 1)
160
161             slug = slugify(title)
162
163             if i < book_len:
164                 chunk = instance[i]
165                 chunk.slug = slug[:50]
166                 chunk.title = title[:255]
167                 chunk.save()
168             else:
169                 chunk = instance.add(slug, title)
170
171             chunk.commit(text, **commit_args)
172
173         return instance
174
175     def make_chunk_slug(self, proposed):
176         """ 
177             Finds a chunk slug not yet used in the book.
178         """
179         slugs = set(c.slug for c in self)
180         i = 1
181         new_slug = proposed[:50]
182         while new_slug in slugs:
183             new_slug = "%s_%d" % (proposed[:45], i)
184             i += 1
185         return new_slug
186
187     @transaction.atomic
188     def append(self, other, slugs=None, titles=None):
189         """Add all chunks of another book to self."""
190         assert self != other
191
192         number = self[len(self) - 1].number + 1
193         len_other = len(other)
194         single = len_other == 1
195
196         if slugs is not None:
197             assert len(slugs) == len_other
198         if titles is not None:
199             assert len(titles) == len_other
200             if slugs is None:
201                 slugs = [slugify(t) for t in titles]
202
203         for i, chunk in enumerate(other):
204             # move chunk to new book
205             chunk.book = self
206             chunk.number = number
207
208             if titles is None:
209                 # try some title guessing
210                 if other.title.startswith(self.title):
211                     other_title_part = other.title[len(self.title):].lstrip(' /')
212                 else:
213                     other_title_part = other.title
214
215                 if single:
216                     # special treatment for appending one-parters:
217                     # just use the guessed title and original book slug
218                     chunk.title = other_title_part
219                     if other.slug.startswith(self.slug):
220                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
221                     else:
222                         chunk.slug = other.slug
223                 else:
224                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
225             else:
226                 chunk.slug = slugs[i]
227                 chunk.title = titles[i]
228
229             chunk.slug = self.make_chunk_slug(chunk.slug)
230             chunk.save()
231             number += 1
232         assert not other.chunk_set.exists()
233
234         gm = GalleryMerger(self.gallery, other.gallery)
235         self.gallery = gm.merge()
236
237         # and move the gallery starts
238         if gm.was_merged:
239                 for chunk in self[len(self) - len_other:]:
240                         old_start = chunk.gallery_start or 1
241                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
242                         chunk.save()
243
244         other.delete()
245
246
247     @transaction.atomic
248     def prepend_history(self, other):
249         """Prepend history from all the other book's chunks to own."""
250         assert self != other
251
252         for i in range(len(self), len(other)):
253             title = u"pusta część %d" % i
254             chunk = self.add(slugify(title), title)
255             chunk.commit('')
256
257         for i in range(len(other)):
258             self[i].prepend_history(other[0])
259
260         assert not other.chunk_set.exists()
261         other.delete()
262
263     def split(self):
264         """Splits all the chunks into separate books."""
265         self.title
266         for chunk in self:
267             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
268                     public=self.public, gallery=self.gallery)
269             book[0].delete()
270             chunk.book = book
271             chunk.number = 1
272             chunk.save()
273         assert not self.chunk_set.exists()
274         self.delete()
275
276     # State & cache
277     # =============
278
279     def last_published(self):
280         try:
281             return self.publish_log.all()[0].timestamp
282         except IndexError:
283             return None
284
285     def assert_publishable(self):
286         assert self.chunk_set.exists(), _('No chunks in the book.')
287         try:
288             changes = self.get_current_changes(publishable=True)
289         except self.NoTextError:
290             raise AssertionError(_('Not all chunks have approved revisions.'))
291
292         from librarian import NoDublinCore, ParseError, ValidationError
293
294         try:
295             bi = self.wldocument(changes=changes, strict=True).book_info
296         except ParseError as e:
297             raise AssertionError(_('Invalid XML') + ': ' + str(e))
298         except NoDublinCore:
299             raise AssertionError(_('No Dublin Core found.'))
300         except ValidationError as e:
301             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
302
303         valid_about = self.correct_about()
304         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
305
306     def publishable_error(self):
307         try:
308             return self.assert_publishable()
309         except AssertionError as e:
310             return e
311         else:
312             return None
313
314     def hidden(self):
315         return self.slug.startswith('.')
316
317     def is_new_publishable(self):
318         """Checks if book is ready for publishing.
319
320         Returns True if there is a publishable version newer than the one
321         already published.
322
323         """
324         new_publishable = False
325         if not self.chunk_set.exists():
326             return False
327         for chunk in self:
328             change = chunk.publishable()
329             if not change:
330                 return False
331             if not new_publishable and not change.publish_log.exists():
332                 new_publishable = True
333         return new_publishable
334     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
335
336     def is_published(self):
337         return self.publish_log.exists()
338     published = cached_in_field('_published')(is_published)
339
340     def get_on_track(self):
341         if self.published:
342             return -1
343         stages = [ch.stage.ordering if ch.stage is not None else 0
344                     for ch in self]
345         if not len(stages):
346             return 0
347         return min(stages)
348     on_track = cached_in_field('_on_track')(get_on_track)
349
350     def is_single(self):
351         return len(self) == 1
352     single = cached_in_field('_single')(is_single)
353
354     def book_info(self, publishable=True):
355         try:
356             book_xml = self.materialize(publishable=publishable)
357         except self.NoTextError:
358             pass
359         else:
360             from librarian.dcparser import BookInfo
361             from librarian import NoDublinCore, ParseError, ValidationError
362             try:
363                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
364             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
365                 return None
366
367     def refresh_dc_cache(self):
368         update = {
369             'catalogue_book_id': None,
370             'dc_cover_image': None,
371         }
372
373         info = self.book_info()
374         if info is not None:
375             update['catalogue_book_id'] = info.url.slug
376             if info.cover_source:
377                 try:
378                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
379                 except:
380                     pass
381                 else:
382                     if info.cover_source == image.get_full_url():
383                         update['dc_cover_image'] = image
384             update['dc'] = info.to_dict()
385         Book.objects.filter(pk=self.pk).update(**update)
386
387     def touch(self):
388         update = {
389             "_new_publishable": self.is_new_publishable(),
390             "_published": self.is_published(),
391             "_single": self.is_single(),
392             "_on_track": self.get_on_track(),
393         }
394         Book.objects.filter(pk=self.pk).update(**update)
395         self.refresh_dc_cache()
396         self.build_cover()
397
398     def build_cover(self):
399         width, height = 212, 300
400         try:
401             xml = self.materialize(publishable=True).encode('utf-8')
402             info = BookInfo.from_bytes(xml)
403             kwargs = {}
404             if chunk.book.project is not None:
405                 if chunk.book.project.logo_mono or chunk.book.project.logo:
406                     kwargs['cover_logo'] = (chunk.book.project.logo_mono or chunk.book.project.logo).path
407             cover = make_cover(info, width=width, height=height, **kwargs)
408             out = BytesIO()
409             ext = cover.ext()
410             cover.save(out)
411             self.cover.save(f'{self.slug}.{ext}', out, save=False)
412             type(self).objects.filter(pk=self.pk).update(cover=self.cover)
413         except:
414             type(self).objects.filter(pk=self.pk).update(cover='')
415
416     # Materializing & publishing
417     # ==========================
418
419     def get_current_changes(self, publishable=True):
420         """
421             Returns a list containing one Change for every Chunk in the Book.
422             Takes the most recent revision (publishable, if set).
423             Throws an error, if a proper revision is unavailable for a Chunk.
424         """
425         if publishable:
426             changes = [chunk.publishable() for chunk in self]
427         else:
428             changes = [chunk.head for chunk in self if chunk.head is not None]
429         if None in changes:
430             raise self.NoTextError('Some chunks have no available text.')
431         return changes
432
433     def materialize(self, publishable=False, changes=None):
434         """ 
435             Get full text of the document compiled from chunks.
436             Takes the current versions of all texts
437             or versions most recently tagged for publishing,
438             or a specified iterable changes.
439         """
440         if changes is None:
441             changes = self.get_current_changes(publishable)
442         return compile_text(change.materialize() for change in changes)
443
444     def wldocument(self, publishable=True, changes=None, 
445                    parse_dublincore=True, strict=False, librarian2=False):
446         from documents.ebook_utils import RedakcjaDocProvider
447         from librarian.parser import WLDocument
448         from librarian.document import WLDocument as WLDocument2
449
450         provider = RedakcjaDocProvider(publishable=publishable)
451         xml = self.materialize(publishable=publishable, changes=changes).encode('utf-8')
452         
453         if librarian2:
454             return WLDocument2(
455                 BytesIO(xml),
456                 provider=provider)
457         return WLDocument.from_bytes(
458                 xml,
459                 provider=provider,
460                 parse_dublincore=parse_dublincore,
461                 strict=strict)
462
463     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
464         """
465             Publishes a book on behalf of a (local) user.
466         """
467         self.assert_publishable()
468         changes = self.get_current_changes(publishable=True)
469         if not fake:
470             book_xml = self.materialize(changes=changes)
471             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
472             if self.project is not None:
473                 if self.project.logo:
474                     data['logo'] = urljoin(
475                         'https://' + Site.objects.get_current().domain,
476                         self.project.logo.url,
477                     )
478                 if self.project.logo_mono:
479                     data['logo_mono'] = urljoin(
480                         'https://' + Site.objects.get_current().domain,
481                         self.project.logo_mono.url,
482                     )
483                 if self.project.logo_alt:
484                     data['logo_alt'] = self.project.logo_alt
485             if host:
486                 data['gallery_url'] = host + self.gallery_url()
487             apiclient.api_call(user, "books/", data, beta=beta)
488         if not beta:
489             # record the publish
490             br = BookPublishRecord.objects.create(book=self, user=user)
491             for c in changes:
492                 ChunkPublishRecord.objects.create(book_record=br, change=c)
493             if not self.public and days == 0:
494                 self.public = True
495                 self.save()
496             if self.public and days > 0:
497                 self.public = False
498                 self.save()
499             post_publish.send(sender=br)
500
501     def latex_dir(self):
502         doc = self.wldocument()
503         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())