e649180da27fbe5720c96f1be73e669142bd4158
[redakcja.git] / src / documents / models / book.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.apps import apps
5 from django.contrib.sites.models import Site
6 from django.db import connection, models, transaction
7 from django.template.loader import render_to_string
8 from django.urls import reverse
9 from django.utils.translation import ugettext_lazy as _
10 from django.conf import settings
11 from slugify import slugify
12
13
14 import apiclient
15 from documents.helpers import cached_in_field, GalleryMerger
16 from documents.models import BookPublishRecord, ChunkPublishRecord, Project
17 from documents.signals import post_publish
18 from documents.xml_tools import compile_text, split_xml
19 from cover.models import Image
20 from io import BytesIO
21 import os
22 import shutil
23 import re
24
25 class Book(models.Model):
26     """ A document edited on the wiki """
27
28     title = models.CharField(_('title'), max_length=255, db_index=True)
29     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
30     public = models.BooleanField(_('public'), default=True, db_index=True)
31     gallery = models.CharField(_('scan gallery name'), max_length=255, blank=True)
32     project = models.ForeignKey(Project, models.SET_NULL, null=True, blank=True)
33
34     parent = models.ForeignKey('self', models.SET_NULL, null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
35     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
36
37     # Cache
38     _single = models.BooleanField(editable=False, null=True, db_index=True)
39     _new_publishable = models.BooleanField(editable=False, null=True)
40     _published = models.BooleanField(editable=False, null=True)
41     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
42     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
43         db_index=True, on_delete=models.SET_NULL, editable=False)
44     dc = models.JSONField(null=True, editable=False)
45     catalogue_book = models.ForeignKey(
46         'catalogue.Book',
47         models.DO_NOTHING,
48         to_field='slug',
49         null=True, blank=True,
50         db_constraint=False,
51         editable=False, db_index=True,
52         related_name='document_books',
53         related_query_name='document_book',
54     )
55
56     class NoTextError(BaseException):
57         pass
58
59     class Meta:
60         app_label = 'documents'
61         ordering = ['title', 'slug']
62         verbose_name = _('book')
63         verbose_name_plural = _('books')
64
65     @classmethod
66     def get_visible_for(cls, user):
67         qs = cls.objects.all()
68         if not user.is_authenticated:
69             qs = qs.filter(public=True)
70         return qs
71
72     @staticmethod
73     def q_dc(field, field_plural, value, prefix=''):
74         if connection.features.supports_json_field_contains:
75             return models.Q(**{f'{prefix}dc__{field_plural}__contains': value})
76         else:
77             return models.Q(**{f'{prefix}dc__{field}': value})
78             
79     
80     # Representing
81     # ============
82
83     def __iter__(self):
84         return iter(self.chunk_set.all())
85
86     def __getitem__(self, chunk):
87         return self.chunk_set.all()[chunk]
88
89     def __len__(self):
90         return self.chunk_set.count()
91
92     def __bool__(self):
93         """
94             Necessary so that __len__ isn't used for bool evaluation.
95         """
96         return True
97
98     def __str__(self):
99         return self.title
100
101     def get_absolute_url(self):
102         return reverse("documents_book", args=[self.slug])
103
104     def correct_about(self):
105         return "http://%s%s" % (
106             Site.objects.get_current().domain,
107             self.get_absolute_url()
108         )
109
110     def gallery_path(self):
111         return os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.gallery)
112
113     def gallery_url(self):
114         return '%s%s%s/' % (settings.MEDIA_URL, settings.IMAGE_DIR, self.gallery)
115
116     # Creating & manipulating
117     # =======================
118
119     def accessible(self, request):
120         return self.public or request.user.is_authenticated
121
122     @classmethod
123     @transaction.atomic
124     def create(cls, creator, text, *args, **kwargs):
125         b = cls.objects.create(*args, **kwargs)
126         b.chunk_set.all().update(creator=creator)
127         b[0].commit(text, author=creator)
128         return b
129
130     def add(self, *args, **kwargs):
131         """Add a new chunk at the end."""
132         return self.chunk_set.reverse()[0].split(*args, **kwargs)
133
134     @classmethod
135     @transaction.atomic
136     def import_xml_text(cls, text=u'', previous_book=None,
137                 commit_args=None, **kwargs):
138         """Imports a book from XML, splitting it into chunks as necessary."""
139         texts = split_xml(text)
140         if previous_book:
141             instance = previous_book
142         else:
143             instance = cls(**kwargs)
144             instance.save()
145
146         # if there are more parts, set the rest to empty strings
147         book_len = len(instance)
148         for i in range(book_len - len(texts)):
149             texts.append((u'pusta część %d' % (i + 1), u''))
150
151         i = 0
152         for i, (title, text) in enumerate(texts):
153             if not title:
154                 title = u'część %d' % (i + 1)
155
156             slug = slugify(title)
157
158             if i < book_len:
159                 chunk = instance[i]
160                 chunk.slug = slug[:50]
161                 chunk.title = title[:255]
162                 chunk.save()
163             else:
164                 chunk = instance.add(slug, title)
165
166             chunk.commit(text, **commit_args)
167
168         return instance
169
170     def make_chunk_slug(self, proposed):
171         """ 
172             Finds a chunk slug not yet used in the book.
173         """
174         slugs = set(c.slug for c in self)
175         i = 1
176         new_slug = proposed[:50]
177         while new_slug in slugs:
178             new_slug = "%s_%d" % (proposed[:45], i)
179             i += 1
180         return new_slug
181
182     @transaction.atomic
183     def append(self, other, slugs=None, titles=None):
184         """Add all chunks of another book to self."""
185         assert self != other
186
187         number = self[len(self) - 1].number + 1
188         len_other = len(other)
189         single = len_other == 1
190
191         if slugs is not None:
192             assert len(slugs) == len_other
193         if titles is not None:
194             assert len(titles) == len_other
195             if slugs is None:
196                 slugs = [slugify(t) for t in titles]
197
198         for i, chunk in enumerate(other):
199             # move chunk to new book
200             chunk.book = self
201             chunk.number = number
202
203             if titles is None:
204                 # try some title guessing
205                 if other.title.startswith(self.title):
206                     other_title_part = other.title[len(self.title):].lstrip(' /')
207                 else:
208                     other_title_part = other.title
209
210                 if single:
211                     # special treatment for appending one-parters:
212                     # just use the guessed title and original book slug
213                     chunk.title = other_title_part
214                     if other.slug.startswith(self.slug):
215                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
216                     else:
217                         chunk.slug = other.slug
218                 else:
219                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
220             else:
221                 chunk.slug = slugs[i]
222                 chunk.title = titles[i]
223
224             chunk.slug = self.make_chunk_slug(chunk.slug)
225             chunk.save()
226             number += 1
227         assert not other.chunk_set.exists()
228
229         gm = GalleryMerger(self.gallery, other.gallery)
230         self.gallery = gm.merge()
231
232         # and move the gallery starts
233         if gm.was_merged:
234                 for chunk in self[len(self) - len_other:]:
235                         old_start = chunk.gallery_start or 1
236                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
237                         chunk.save()
238
239         other.delete()
240
241
242     @transaction.atomic
243     def prepend_history(self, other):
244         """Prepend history from all the other book's chunks to own."""
245         assert self != other
246
247         for i in range(len(self), len(other)):
248             title = u"pusta część %d" % i
249             chunk = self.add(slugify(title), title)
250             chunk.commit('')
251
252         for i in range(len(other)):
253             self[i].prepend_history(other[0])
254
255         assert not other.chunk_set.exists()
256         other.delete()
257
258     def split(self):
259         """Splits all the chunks into separate books."""
260         self.title
261         for chunk in self:
262             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
263                     public=self.public, gallery=self.gallery)
264             book[0].delete()
265             chunk.book = book
266             chunk.number = 1
267             chunk.save()
268         assert not self.chunk_set.exists()
269         self.delete()
270
271     # State & cache
272     # =============
273
274     def last_published(self):
275         try:
276             return self.publish_log.all()[0].timestamp
277         except IndexError:
278             return None
279
280     def assert_publishable(self):
281         assert self.chunk_set.exists(), _('No chunks in the book.')
282         try:
283             changes = self.get_current_changes(publishable=True)
284         except self.NoTextError:
285             raise AssertionError(_('Not all chunks have publishable revisions.'))
286
287         from librarian import NoDublinCore, ParseError, ValidationError
288
289         try:
290             bi = self.wldocument(changes=changes, strict=True).book_info
291         except ParseError as e:
292             raise AssertionError(_('Invalid XML') + ': ' + str(e))
293         except NoDublinCore:
294             raise AssertionError(_('No Dublin Core found.'))
295         except ValidationError as e:
296             raise AssertionError(_('Invalid Dublin Core') + ': ' + str(e))
297
298         valid_about = self.correct_about()
299         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
300
301     def publishable_error(self):
302         try:
303             return self.assert_publishable()
304         except AssertionError as e:
305             return e
306         else:
307             return None
308
309     def hidden(self):
310         return self.slug.startswith('.')
311
312     def is_new_publishable(self):
313         """Checks if book is ready for publishing.
314
315         Returns True if there is a publishable version newer than the one
316         already published.
317
318         """
319         new_publishable = False
320         if not self.chunk_set.exists():
321             return False
322         for chunk in self:
323             change = chunk.publishable()
324             if not change:
325                 return False
326             if not new_publishable and not change.publish_log.exists():
327                 new_publishable = True
328         return new_publishable
329     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
330
331     def is_published(self):
332         return self.publish_log.exists()
333     published = cached_in_field('_published')(is_published)
334
335     def get_on_track(self):
336         if self.published:
337             return -1
338         stages = [ch.stage.ordering if ch.stage is not None else 0
339                     for ch in self]
340         if not len(stages):
341             return 0
342         return min(stages)
343     on_track = cached_in_field('_on_track')(get_on_track)
344
345     def is_single(self):
346         return len(self) == 1
347     single = cached_in_field('_single')(is_single)
348
349     def book_info(self, publishable=True):
350         try:
351             book_xml = self.materialize(publishable=publishable)
352         except self.NoTextError:
353             pass
354         else:
355             from librarian.dcparser import BookInfo
356             from librarian import NoDublinCore, ParseError, ValidationError
357             try:
358                 return BookInfo.from_bytes(book_xml.encode('utf-8'))
359             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
360                 return None
361
362     def refresh_dc_cache(self):
363         update = {
364             'catalogue_book_id': None,
365             'dc_cover_image': None,
366         }
367
368         info = self.book_info()
369         if info is not None:
370             update['catalogue_book_id'] = info.url.slug
371             if info.cover_source:
372                 try:
373                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
374                 except:
375                     pass
376                 else:
377                     if info.cover_source == image.get_full_url():
378                         update['dc_cover_image'] = image
379             update['dc'] = info.to_dict()
380         Book.objects.filter(pk=self.pk).update(**update)
381
382     def touch(self):
383         update = {
384             "_new_publishable": self.is_new_publishable(),
385             "_published": self.is_published(),
386             "_single": self.is_single(),
387             "_on_track": self.get_on_track(),
388         }
389         Book.objects.filter(pk=self.pk).update(**update)
390         self.refresh_dc_cache()
391
392     # Materializing & publishing
393     # ==========================
394
395     def get_current_changes(self, publishable=True):
396         """
397             Returns a list containing one Change for every Chunk in the Book.
398             Takes the most recent revision (publishable, if set).
399             Throws an error, if a proper revision is unavailable for a Chunk.
400         """
401         if publishable:
402             changes = [chunk.publishable() for chunk in self]
403         else:
404             changes = [chunk.head for chunk in self if chunk.head is not None]
405         if None in changes:
406             raise self.NoTextError('Some chunks have no available text.')
407         return changes
408
409     def materialize(self, publishable=False, changes=None):
410         """ 
411             Get full text of the document compiled from chunks.
412             Takes the current versions of all texts
413             or versions most recently tagged for publishing,
414             or a specified iterable changes.
415         """
416         if changes is None:
417             changes = self.get_current_changes(publishable)
418         return compile_text(change.materialize() for change in changes)
419
420     def wldocument(self, publishable=True, changes=None, 
421                    parse_dublincore=True, strict=False, librarian2=False):
422         from documents.ebook_utils import RedakcjaDocProvider
423         from librarian.parser import WLDocument
424         from librarian.document import WLDocument as WLDocument2
425
426         provider = RedakcjaDocProvider(publishable=publishable)
427         xml = self.materialize(publishable=publishable, changes=changes).encode('utf-8')
428         
429         if librarian2:
430             return WLDocument2(
431                 BytesIO(xml),
432                 provider=provider)
433         return WLDocument.from_bytes(
434                 xml,
435                 provider=provider,
436                 parse_dublincore=parse_dublincore,
437                 strict=strict)
438
439     def publish(self, user, fake=False, host=None, days=0, beta=False, hidden=False):
440         """
441             Publishes a book on behalf of a (local) user.
442         """
443         self.assert_publishable()
444         changes = self.get_current_changes(publishable=True)
445         if not fake:
446             book_xml = self.materialize(changes=changes)
447             data = {"book_xml": book_xml, "days": days, "hidden": hidden}
448             if host:
449                 data['gallery_url'] = host + self.gallery_url()
450             apiclient.api_call(user, "books/", data, beta=beta)
451         if not beta:
452             # record the publish
453             br = BookPublishRecord.objects.create(book=self, user=user)
454             for c in changes:
455                 ChunkPublishRecord.objects.create(book_record=br, change=c)
456             if not self.public and days == 0:
457                 self.public = True
458                 self.save()
459             if self.public and days > 0:
460                 self.public = False
461                 self.save()
462             post_publish.send(sender=br)
463
464     def latex_dir(self):
465         doc = self.wldocument()
466         return doc.latex_dir(cover=True, ilustr_path=self.gallery_path())