images in epub
[redakcja.git] / apps / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from django.contrib.auth.models import User
7 from django.contrib.sites.models import Site
8 from django.db import models, transaction
9 from django.template.loader import render_to_string
10 from django.utils.translation import ugettext_lazy as _
11 from django.conf import settings
12 from slughifi import slughifi
13
14 import apiclient
15 from catalogue.helpers import cached_in_field, GalleryMerger
16 from catalogue.models import BookPublishRecord, ChunkPublishRecord, Project
17 from catalogue.signals import post_publish
18 from catalogue.tasks import refresh_instance, book_content_updated
19 from catalogue.xml_tools import compile_text, split_xml
20 from cover.models import Image
21 from organizations.models import Organization
22 import os
23 import shutil
24 import re
25
26
27 class Book(models.Model):
28     """ A document edited on the wiki """
29
30     title = models.CharField(_('title'), max_length=255, db_index=True)
31     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
32     public = models.BooleanField(_('public'), default=True, db_index=True)
33     gallery = models.CharField(u'materiały', max_length=255, blank=True)
34     project = models.ForeignKey(Project, null=True, blank=True)
35
36     owner_user = models.ForeignKey(User, null=True)
37     owner_organization = models.ForeignKey(Organization, null=True)
38
39     #wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False)
40     parent = models.ForeignKey('self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
41     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
42
43     # Cache
44     _short_html = models.TextField(null=True, blank=True, editable=False)
45     _single = models.NullBooleanField(editable=False, db_index=True)
46     _new_publishable = models.NullBooleanField(editable=False)
47     _published = models.NullBooleanField(editable=False)
48     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
49     dc_cover_image = models.ForeignKey(Image, blank=True, null=True,
50         db_index=True, on_delete=models.SET_NULL, editable=False)
51     dc_slug = models.CharField(max_length=128, null=True, blank=True,
52             editable=False, db_index=True)
53
54     class NoTextError(BaseException):
55         pass
56
57     class Meta:
58         app_label = 'catalogue'
59         ordering = ['title', 'slug']
60         verbose_name = u'moduł'
61         verbose_name_plural = u'moduły'
62
63
64     # Representing
65     # ============
66
67     def __iter__(self):
68         return iter(self.chunk_set.all())
69
70     def __getitem__(self, chunk):
71         return self.chunk_set.all()[chunk]
72
73     def __len__(self):
74         return self.chunk_set.count()
75
76     def __nonzero__(self):
77         """
78             Necessary so that __len__ isn't used for bool evaluation.
79         """
80         return True
81
82     def __unicode__(self):
83         return self.title
84
85     @models.permalink
86     def get_absolute_url(self):
87         return ("catalogue_book", [self.slug])
88
89     def correct_about(self):
90         return "http://%s%s" % (
91             Site.objects.get_current().domain,
92             self.get_absolute_url()
93         )
94
95     # Creating & manipulating
96     # =======================
97
98     def accessible(self, request):
99         return self.public or request.user.is_authenticated()
100
101     @classmethod
102     @transaction.commit_on_success
103     def create(cls, creator, text, *args, **kwargs):
104         b = cls.objects.create(*args, **kwargs)
105         b.chunk_set.all().update(creator=creator)
106         b[0].commit(text, author=creator)
107         return b
108
109     def add(self, *args, **kwargs):
110         """Add a new chunk at the end."""
111         return self.chunk_set.reverse()[0].split(*args, **kwargs)
112
113     @classmethod
114     @transaction.commit_on_success
115     def import_xml_text(cls, text=u'', previous_book=None,
116                 commit_args=None, **kwargs):
117         """Imports a book from XML, splitting it into chunks as necessary."""
118         texts = split_xml(text)
119         if previous_book:
120             instance = previous_book
121         else:
122             instance = cls(**kwargs)
123             instance.save()
124
125         # if there are more parts, set the rest to empty strings
126         book_len = len(instance)
127         for i in range(book_len - len(texts)):
128             texts.append((u'pusta część %d' % (i + 1), u''))
129
130         i = 0
131         for i, (title, text) in enumerate(texts):
132             if not title:
133                 title = u'część %d' % (i + 1)
134
135             slug = slughifi(title)
136
137             if i < book_len:
138                 chunk = instance[i]
139                 chunk.slug = slug[:50]
140                 chunk.title = title[:255]
141                 chunk.save()
142             else:
143                 chunk = instance.add(slug, title)
144
145             chunk.commit(text, **commit_args)
146
147         return instance
148
149     def make_chunk_slug(self, proposed):
150         """ 
151             Finds a chunk slug not yet used in the book.
152         """
153         slugs = set(c.slug for c in self)
154         i = 1
155         new_slug = proposed[:50]
156         while new_slug in slugs:
157             new_slug = "%s_%d" % (proposed[:45], i)
158             i += 1
159         return new_slug
160
161     @transaction.commit_on_success
162     def append(self, other, slugs=None, titles=None):
163         """Add all chunks of another book to self."""
164         assert self != other
165
166         number = self[len(self) - 1].number + 1
167         len_other = len(other)
168         single = len_other == 1
169
170         if slugs is not None:
171             assert len(slugs) == len_other
172         if titles is not None:
173             assert len(titles) == len_other
174             if slugs is None:
175                 slugs = [slughifi(t) for t in titles]
176
177         for i, chunk in enumerate(other):
178             # move chunk to new book
179             chunk.book = self
180             chunk.number = number
181
182             if titles is None:
183                 # try some title guessing
184                 if other.title.startswith(self.title):
185                     other_title_part = other.title[len(self.title):].lstrip(' /')
186                 else:
187                     other_title_part = other.title
188
189                 if single:
190                     # special treatment for appending one-parters:
191                     # just use the guessed title and original book slug
192                     chunk.title = other_title_part
193                     if other.slug.startswith(self.slug):
194                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
195                     else:
196                         chunk.slug = other.slug
197                 else:
198                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
199             else:
200                 chunk.slug = slugs[i]
201                 chunk.title = titles[i]
202
203             chunk.slug = self.make_chunk_slug(chunk.slug)
204             chunk.save()
205             number += 1
206         assert not other.chunk_set.exists()
207
208         gm = GalleryMerger(self.gallery, other.gallery)
209         self.gallery = gm.merge()
210
211         # and move the gallery starts
212         if gm.was_merged:
213                 for chunk in self[len(self) - len_other:]:
214                         old_start = chunk.gallery_start or 1
215                         chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
216                         chunk.save()
217
218         other.delete()
219
220
221     @transaction.commit_on_success
222     def prepend_history(self, other):
223         """Prepend history from all the other book's chunks to own."""
224         assert self != other
225
226         for i in range(len(self), len(other)):
227             title = u"pusta część %d" % i
228             chunk = self.add(slughifi(title), title)
229             chunk.commit('')
230
231         for i in range(len(other)):
232             self[i].prepend_history(other[0])
233
234         assert not other.chunk_set.exists()
235         other.delete()
236
237     def split(self):
238         """Splits all the chunks into separate books."""
239         self.title
240         for chunk in self:
241             book = Book.objects.create(title=chunk.title, slug=chunk.slug,
242                     public=self.public, gallery=self.gallery)
243             book[0].delete()
244             chunk.book = book
245             chunk.number = 1
246             chunk.save()
247         assert not self.chunk_set.exists()
248         self.delete()
249
250     # State & cache
251     # =============
252
253     def last_published(self):
254         try:
255             return self.publish_log.all()[0].timestamp
256         except IndexError:
257             return None
258
259     def assert_publishable(self):
260         assert self.chunk_set.exists(), _('No chunks in the book.')
261         try:
262             changes = self.get_current_changes(publishable=True)
263         except self.NoTextError:
264             raise AssertionError(_('Not all chunks have publishable revisions.'))
265
266         from librarian import NoDublinCore, ParseError, ValidationError
267
268         try:
269             bi = self.wldocument(changes=changes, strict=True).book_info
270         except ParseError, e:
271             raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
272         except NoDublinCore:
273             raise AssertionError(_('No Dublin Core found.'))
274         except ValidationError, e:
275             raise AssertionError(_('Invalid Dublin Core') + ': ' + unicode(e))
276
277         valid_about = self.correct_about()
278         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
279
280     def publishable_error(self):
281         try:
282             return self.assert_publishable()
283         except AssertionError, e:
284             return e
285         else:
286             return None
287
288     def hidden(self):
289         return self.slug.startswith('.')
290
291     def is_new_publishable(self):
292         """Checks if book is ready for publishing.
293
294         Returns True if there is a publishable version newer than the one
295         already published.
296
297         """
298         new_publishable = False
299         if not self.chunk_set.exists():
300             return False
301         for chunk in self:
302             change = chunk.publishable()
303             if not change:
304                 return False
305             if not new_publishable and not change.publish_log.exists():
306                 new_publishable = True
307         return new_publishable
308     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
309
310     def is_published(self):
311         return self.publish_log.exists()
312     published = cached_in_field('_published')(is_published)
313
314     def get_on_track(self):
315         if self.published:
316             return -1
317         stages = [ch.stage.ordering if ch.stage is not None else 0
318                     for ch in self]
319         if not len(stages):
320             return 0
321         return min(stages)
322     on_track = cached_in_field('_on_track')(get_on_track)
323
324     def is_single(self):
325         return len(self) == 1
326     single = cached_in_field('_single')(is_single)
327
328     #@cached_in_field('_short_html')
329     def short_html(self):
330         return render_to_string('catalogue/book_list/book.html', {'book': self})
331
332     def book_info(self, publishable=True):
333         try:
334             book_xml = self.materialize(publishable=publishable)
335         except self.NoTextError:
336             pass
337         else:
338             from librarian.dcparser import BookInfo
339             from librarian import NoDublinCore, ParseError, ValidationError
340             try:
341                 return BookInfo.from_string(book_xml.encode('utf-8'))
342             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
343                 return None
344
345     def refresh_dc_cache(self):
346         update = {
347             'dc_slug': None,
348             'dc_cover_image': None,
349         }
350
351         info = self.book_info()
352         if info is not None:
353             update['dc_slug'] = info.url.slug
354             if info.cover_source:
355                 try:
356                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
357                 except:
358                     pass
359                 else:
360                     if info.cover_source == image.get_full_url():
361                         update['dc_cover_image'] = image
362         Book.objects.filter(pk=self.pk).update(**update)
363
364     def touch(self):
365         # this should only really be done when text or publishable status changes
366         book_content_updated.delay(self)
367
368         update = {
369             "_new_publishable": self.is_new_publishable(),
370             "_published": self.is_published(),
371             "_single": self.is_single(),
372             "_on_track": self.get_on_track(),
373             "_short_html": None,
374         }
375         Book.objects.filter(pk=self.pk).update(**update)
376         refresh_instance(self)
377
378     def refresh(self):
379         """This should be done offline."""
380         self.short_html
381         self.single
382         self.new_publishable
383         self.published
384
385     # Materializing & publishing
386     # ==========================
387
388     def get_current_changes(self, publishable=True):
389         """
390             Returns a list containing one Change for every Chunk in the Book.
391             Takes the most recent revision (publishable, if set).
392             Throws an error, if a proper revision is unavailable for a Chunk.
393         """
394         if publishable:
395             changes = [chunk.publishable() for chunk in self]
396         else:
397             changes = [chunk.head for chunk in self if chunk.head is not None]
398         if None in changes:
399             raise self.NoTextError('Some chunks have no available text.')
400         return changes
401
402     def materialize(self, publishable=False, changes=None):
403         """ 
404             Get full text of the document compiled from chunks.
405             Takes the current versions of all texts
406             or versions most recently tagged for publishing,
407             or a specified iterable changes.
408         """
409         if changes is None:
410             changes = self.get_current_changes(publishable)
411         return compile_text(change.materialize() for change in changes)
412
413     def wldocument(self, publishable=True, changes=None, 
414             parse_dublincore=True, strict=False):
415         from catalogue.ebook_utils import RedakcjaDocProvider
416         from librarian.parser import WLDocument
417
418         return WLDocument.from_string(
419                 self.materialize(publishable=publishable, changes=changes),
420                 provider=RedakcjaDocProvider(publishable=publishable),
421                 parse_dublincore=parse_dublincore,
422                 strict=strict)
423
424     def publish(self, user):
425         """
426             Publishes a book on behalf of a (local) user.
427         """
428         self.assert_publishable()
429         changes = self.get_current_changes(publishable=True)
430         book_xml = self.materialize(changes=changes)
431         apiclient.api_call(user, "books/", {"book_xml": book_xml})
432         # record the publish
433         br = BookPublishRecord.objects.create(book=self, user=user)
434         for c in changes:
435             ChunkPublishRecord.objects.create(book_record=br, change=c)
436         post_publish.send(sender=br)