minor cleanup
[redakcja.git] / apps / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from django.contrib.sites.models import Site
7 from django.db import models, transaction
8 from django.template.loader import render_to_string
9 from django.utils.translation import ugettext_lazy as _
10 from slughifi import slughifi
11
12 import apiclient
13 from catalogue.helpers import cached_in_field, GalleryMerger
14 from catalogue.models import BookPublishRecord, ChunkPublishRecord, Project
15 from catalogue.signals import post_publish
16 from catalogue.tasks import refresh_instance, book_content_updated
17 from catalogue.xml_tools import compile_text, split_xml
18 from cover.models import Image
19
20
21 class Book(models.Model):
22     """ A document edited on the wiki """
23
24     title = models.CharField(_('title'), max_length=255, db_index=True)
25     slug = models.SlugField(_('slug'), max_length=128, unique=True, db_index=True)
26     public = models.BooleanField(_('public'), default=True, db_index=True)
27     gallery = models.CharField(u'materiały', max_length=255, blank=True)
28     project = models.ForeignKey(Project, null=True, blank=True)
29
30     # wl_slug = models.CharField(_('title'), max_length=255, null=True, db_index=True, editable=False)
31     parent = models.ForeignKey(
32         'self', null=True, blank=True, verbose_name=_('parent'), related_name="children", editable=False)
33     parent_number = models.IntegerField(_('parent number'), null=True, blank=True, db_index=True, editable=False)
34
35     for_cybernauts = models.BooleanField(_('for Cybernauts'), default=False)
36
37     # Cache
38     _short_html = models.TextField(null=True, blank=True, editable=False)
39     _single = models.NullBooleanField(editable=False, db_index=True)
40     _new_publishable = models.NullBooleanField(editable=False)
41     _published = models.NullBooleanField(editable=False)
42     _on_track = models.IntegerField(null=True, blank=True, db_index=True, editable=False)
43     dc_cover_image = models.ForeignKey(
44         Image, blank=True, null=True, db_index=True, on_delete=models.SET_NULL, editable=False)
45     dc_slug = models.CharField(max_length=128, null=True, blank=True, editable=False, db_index=True)
46
47     class NoTextError(BaseException):
48         pass
49
50     class Meta:
51         app_label = 'catalogue'
52         ordering = ['title', 'slug']
53         verbose_name = u'moduł'
54         verbose_name_plural = u'moduły'
55
56     # Representing
57     # ============
58
59     def __iter__(self):
60         return iter(self.chunk_set.all())
61
62     def __getitem__(self, chunk):
63         return self.chunk_set.all()[chunk]
64
65     def __len__(self):
66         return self.chunk_set.count()
67
68     def __nonzero__(self):
69         """
70             Necessary so that __len__ isn't used for bool evaluation.
71         """
72         return True
73
74     def __unicode__(self):
75         return self.title
76
77     @models.permalink
78     def get_absolute_url(self):
79         return "catalogue_book", [self.slug]
80
81     def correct_about(self):
82         return "http://%s%s" % (
83             Site.objects.get_current().domain,
84             self.get_absolute_url()
85         )
86
87     # Creating & manipulating
88     # =======================
89
90     def accessible(self, request):
91         return self.public or request.user.is_authenticated()
92
93     @classmethod
94     @transaction.commit_on_success
95     def create(cls, creator, text, **kwargs):
96         b = cls.objects.create(**kwargs)
97         b.chunk_set.all().update(creator=creator)
98         b[0].commit(text, author=creator)
99         return b
100
101     def add(self, *args, **kwargs):
102         """Add a new chunk at the end."""
103         return self.chunk_set.reverse()[0].split(*args, **kwargs)
104
105     @classmethod
106     @transaction.commit_on_success
107     def import_xml_text(cls, text=u'', previous_book=None, commit_args=None, **kwargs):
108         """Imports a book from XML, splitting it into chunks as necessary."""
109         texts = split_xml(text)
110         if previous_book:
111             instance = previous_book
112         else:
113             instance = cls(**kwargs)
114             instance.save()
115
116         # if there are more parts, set the rest to empty strings
117         book_len = len(instance)
118         for i in range(book_len - len(texts)):
119             texts.append((u'pusta część %d' % (i + 1), u''))
120
121         for i, (title, text) in enumerate(texts):
122             if not title:
123                 title = u'część %d' % (i + 1)
124
125             slug = slughifi(title)
126
127             if i < book_len:
128                 chunk = instance[i]
129                 chunk.slug = slug[:50]
130                 chunk.title = title[:255]
131                 chunk.save()
132             else:
133                 chunk = instance.add(slug, title)
134
135             chunk.commit(text, **commit_args)
136
137         return instance
138
139     def make_chunk_slug(self, proposed):
140         """
141             Finds a chunk slug not yet used in the book.
142         """
143         slugs = set(c.slug for c in self)
144         i = 1
145         new_slug = proposed[:50]
146         while new_slug in slugs:
147             new_slug = "%s_%d" % (proposed[:45], i)
148             i += 1
149         return new_slug
150
151     @transaction.commit_on_success
152     def append(self, other, slugs=None, titles=None):
153         """Add all chunks of another book to self."""
154         assert self != other
155
156         number = self[len(self) - 1].number + 1
157         len_other = len(other)
158         single = len_other == 1
159
160         if slugs is not None:
161             assert len(slugs) == len_other
162         if titles is not None:
163             assert len(titles) == len_other
164             if slugs is None:
165                 slugs = [slughifi(t) for t in titles]
166
167         for i, chunk in enumerate(other):
168             # move chunk to new book
169             chunk.book = self
170             chunk.number = number
171
172             if titles is None:
173                 # try some title guessing
174                 if other.title.startswith(self.title):
175                     other_title_part = other.title[len(self.title):].lstrip(' /')
176                 else:
177                     other_title_part = other.title
178
179                 if single:
180                     # special treatment for appending one-parters:
181                     # just use the guessed title and original book slug
182                     chunk.title = other_title_part
183                     if other.slug.startswith(self.slug):
184                         chunk.slug = other.slug[len(self.slug):].lstrip('-_')
185                     else:
186                         chunk.slug = other.slug
187                 else:
188                     chunk.title = ("%s, %s" % (other_title_part, chunk.title))[:255]
189             else:
190                 chunk.slug = slugs[i]
191                 chunk.title = titles[i]
192
193             chunk.slug = self.make_chunk_slug(chunk.slug)
194             chunk.save()
195             number += 1
196         assert not other.chunk_set.exists()
197
198         gm = GalleryMerger(self.gallery, other.gallery)
199         self.gallery = gm.merge()
200
201         # and move the gallery starts
202         if gm.was_merged:
203             for chunk in self[len(self) - len_other:]:
204                 old_start = chunk.gallery_start or 1
205                 chunk.gallery_start = old_start + gm.dest_size - gm.num_deleted
206                 chunk.save()
207
208         other.delete()
209
210     @transaction.commit_on_success
211     def prepend_history(self, other):
212         """Prepend history from all the other book's chunks to own."""
213         assert self != other
214
215         for i in range(len(self), len(other)):
216             title = u"pusta część %d" % i
217             chunk = self.add(slughifi(title), title)
218             chunk.commit('')
219
220         for i in range(len(other)):
221             self[i].prepend_history(other[0])
222
223         assert not other.chunk_set.exists()
224         other.delete()
225
226     def split(self):
227         """Splits all the chunks into separate books."""
228         for chunk in self:
229             book = Book.objects.create(title=chunk.title, slug=chunk.slug, public=self.public, gallery=self.gallery)
230             book[0].delete()
231             chunk.book = book
232             chunk.number = 1
233             chunk.save()
234         assert not self.chunk_set.exists()
235         self.delete()
236
237     # State & cache
238     # =============
239
240     def last_published(self):
241         try:
242             return self.publish_log.all()[0].timestamp
243         except IndexError:
244             return None
245
246     def assert_publishable(self):
247         assert self.chunk_set.exists(), _('No chunks in the book.')
248         try:
249             changes = self.get_current_changes()
250         except self.NoTextError:
251             raise AssertionError(_('Not all chunks have publishable revisions.'))
252
253         from librarian import NoDublinCore, ParseError, ValidationError
254
255         try:
256             bi = self.wldocument(changes=changes, strict=True).book_info
257             if not bi.audience:
258                 raise ValidationError('No audience specified')
259             if not bi.type:
260                 raise ValidationError('No type specified')
261         except ParseError, e:
262             raise AssertionError(_('Invalid XML') + ': ' + unicode(e))
263         except NoDublinCore:
264             raise AssertionError(_('No Dublin Core found.'))
265         except ValidationError, e:
266             raise AssertionError(_('Invalid Dublin Core') + ': ' + unicode(e))
267
268         valid_about = self.correct_about()
269         assert bi.about == valid_about, _("rdf:about is not") + " " + valid_about
270
271     def publishable_error(self):
272         try:
273             return self.assert_publishable()
274         except AssertionError, e:
275             return e
276
277     def hidden(self):
278         return self.slug.startswith('.')
279
280     def is_new_publishable(self):
281         """Checks if book is ready for publishing.
282
283         Returns True if there is a publishable version newer than the one
284         already published.
285
286         """
287         new_publishable = False
288         if not self.chunk_set.exists():
289             return False
290         for chunk in self:
291             change = chunk.publishable()
292             if not change:
293                 return False
294             if not new_publishable and not change.publish_log.exists():
295                 new_publishable = True
296         return new_publishable
297     new_publishable = cached_in_field('_new_publishable')(is_new_publishable)
298
299     def is_published(self):
300         return self.publish_log.exists()
301     published = cached_in_field('_published')(is_published)
302
303     def get_on_track(self):
304         if self.published:
305             return -1
306         stages = [ch.stage.ordering if ch.stage is not None else 0 for ch in self]
307         if not len(stages):
308             return 0
309         return min(stages)
310     on_track = cached_in_field('_on_track')(get_on_track)
311
312     def is_single(self):
313         return len(self) == 1
314     single = cached_in_field('_single')(is_single)
315
316     @cached_in_field('_short_html')
317     def short_html(self):
318         return render_to_string('catalogue/book_list/book.html', {'book': self})
319
320     def book_info(self, publishable=True):
321         try:
322             book_xml = self.wl1_xml(publishable=publishable)
323         except self.NoTextError:
324             pass
325         else:
326             from librarian.dcparser import BookInfo
327             from librarian import NoDublinCore, ParseError, ValidationError
328             try:
329                 return BookInfo.from_string(book_xml)
330             except (self.NoTextError, ParseError, NoDublinCore, ValidationError):
331                 return None
332
333     def refresh_dc_cache(self):
334         update = {
335             'dc_slug': None,
336             'dc_cover_image': None,
337         }
338
339         info = self.book_info()
340         if info is not None:
341             update['dc_slug'] = info.url.slug
342             if info.cover_source:
343                 try:
344                     image = Image.objects.get(pk=int(info.cover_source.rstrip('/').rsplit('/', 1)[-1]))
345                 except Image.DoesNotExist:
346                     pass
347                 else:
348                     if info.cover_source == image.get_full_url():
349                         update['dc_cover_image'] = image
350         Book.objects.filter(pk=self.pk).update(**update)
351
352     def touch(self):
353         # this should only really be done when text or publishable status changes
354         book_content_updated.delay(self)
355
356         update = {
357             "_new_publishable": self.is_new_publishable(),
358             "_published": self.is_published(),
359             "_single": self.is_single(),
360             "_on_track": self.get_on_track(),
361             "_short_html": None,
362         }
363         Book.objects.filter(pk=self.pk).update(**update)
364         refresh_instance(self)
365
366     def refresh(self):
367         """This should be done offline."""
368         self.short_html
369         self.single
370         self.new_publishable
371         self.published
372
373     # Materializing & publishing
374     # ==========================
375
376     def get_current_changes(self, publishable=True):
377         """
378             Returns a list containing one Change for every Chunk in the Book.
379             Takes the most recent revision (publishable, if set).
380             Throws an error, if a proper revision is unavailable for a Chunk.
381         """
382         if publishable:
383             changes = [chunk.publishable() for chunk in self]
384         else:
385             changes = [chunk.head for chunk in self if chunk.head is not None]
386         if None in changes:
387             raise self.NoTextError('Some chunks have no available text.')
388         return changes
389
390     def materialize(self, publishable=False, changes=None):
391         """
392             Get full text of the document compiled from chunks.
393             Takes the current versions of all texts
394             or versions most recently tagged for publishing,
395             or a specified iterable changes.
396         """
397         if changes is None:
398             changes = self.get_current_changes(publishable)
399         return compile_text(change.materialize() for change in changes)
400
401     def wldocument(self, publishable=True, changes=None, parse_dublincore=True, strict=False):
402         from catalogue.ebook_utils import RedakcjaDocProvider
403         from librarian.parser import WLDocument
404
405         return WLDocument.from_string(
406                 self.wl1_xml(publishable=publishable, changes=changes),
407                 provider=RedakcjaDocProvider(publishable=publishable),
408                 parse_dublincore=parse_dublincore,
409                 strict=strict)
410
411     def publish(self, user, host=None):
412         """
413             Publishes a book on behalf of a (local) user.
414         """
415         import json
416         import os
417         from django.conf import settings
418         self.assert_publishable()
419         changes = self.get_current_changes()
420         data = {"lesson_xml": self.wl1_xml(changes=changes)}
421         if host:
422             gallery_url = u'%s%s%s%s/' % (host, settings.MEDIA_URL, settings.IMAGE_DIR, self.slug)
423             gallery_dir = os.path.join(settings.MEDIA_ROOT, settings.IMAGE_DIR, self.slug)
424             if os.path.isdir(gallery_dir):
425                 data['gallery_url'] = gallery_url
426                 data['attachments'] = json.dumps(os.listdir(gallery_dir))
427         apiclient.api_call(user, "lessons/", data)
428         # record the publish
429         br = BookPublishRecord.objects.create(book=self, user=user)
430         for c in changes:
431             ChunkPublishRecord.objects.create(book_record=br, change=c)
432         post_publish.send(sender=br)
433
434     def wl1_xml(self, publishable=True, changes=None):
435         from lxml import etree
436         import re
437         from StringIO import StringIO
438         from urllib import unquote
439         import os.path
440         from django.conf import settings
441         from fnpdjango.utils.text.slughifi import slughifi
442         from librarian import ParseError, DCNS
443
444         def _register_function(f):
445             """ Register extension function with lxml """
446             ns = etree.FunctionNamespace('http://wolnelektury.pl/functions')
447             ns[f.__name__] = f
448             return f
449
450         @_register_function
451         def slugify(context, text):
452             """Remove unneeded whitespace from beginning and end"""
453             if isinstance(text, list):
454                 text = ''.join(text)
455             return slughifi(text)
456
457         @_register_function
458         def rmext(context, text):
459             if isinstance(text, list):
460                 text = ''.join(text)
461             text = unquote(text)
462             if '.' in text:
463                 name, ext = text.rsplit('.', 1)
464                 if ext.lower() in ('doc', 'docx', 'odt', 'pdf', 'jpg', 'jpeg'):
465                     text = name
466             return text
467
468         t = etree.parse(os.path.join(settings.PROJECT_ROOT, 'xslt/wl2to1.xslt'))
469         ft = self.materialize(publishable=publishable, changes=changes)
470         ft = ft.replace('&nbsp;', ' ')
471         f2 = StringIO(ft)
472         i1 = etree.parse(f2)
473
474         for sect in i1.findall('//section'):
475             if sect[0].text and sect[0].text.strip() == u'Przebieg zajęć':
476                 # Prostujemy.
477                 first = sect.find('section')
478                 subs = first.findall('.//section')
479                 for sub in subs:
480                     sect.append(sub)
481                 break
482         else:
483             # print 'BRAK PRZEBIEGU'
484             dc_type = i1.findall('//dc:type', namespaces={'dc': DCNS.uri})
485             if dc_type and dc_type[0] in ('course', 'synthetic'):
486                 raise ParseError('Brak przebiegu')
487
488         i1.getroot().attrib['redslug'] = self.slug
489         i1.getroot().attrib['wlslug'] = self.slug  # THIS!
490         # print '.',
491         w1t = i1.xslt(t)
492         for h in w1t.findall('//aktywnosc/opis'):
493             if len(h) == 0:
494                 raise ParseError('Pusty element aktywnosc/opis')
495             # FIXME assumption that every lesson has at most 9 parts
496             if not h[0].text or not re.match(r'\d\.\s', h[0].text):
497                 raise ParseError('Niepoprawny nagłówek (aktywnosc/opis): %s' % repr(h[0].text))
498             h[0].text = h[0].text[3:]
499         return etree.tostring(w1t, encoding='utf-8')