remove "we don't support full text search"
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 from django.utils.deconstruct import deconstructible
18 import jsonfield
19 from fnpdjango.storage import BofhFileSystemStorage
20 from ssify import flush_ssi_includes
21 from newtagging import managers
22 from catalogue import constants
23 from catalogue.fields import EbookField
24 from catalogue.models import Tag, Fragment, BookMedia
25 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
26 from catalogue.models.tag import prefetched_relations
27 from catalogue import app_settings
28 from catalogue import tasks
29 from wolnelektury.utils import makedirs
30
31 bofh_storage = BofhFileSystemStorage()
32
33
34 @deconstructible
35 class UploadToPath(object):
36     def __init__(self, path):
37         self.path = path
38
39     def __call__(self, instance, filename):
40         return self.path % instance.slug
41
42
43 _cover_upload_to = UploadToPath('book/cover/%s.jpg')
44 _cover_thumb_upload_to = UploadToPath('book/cover_thumb/%s.jpg')
45 _cover_api_thumb_upload_to = UploadToPath('book/cover_api_thumb/%s.jpg')
46 _simple_cover_upload_to = UploadToPath('book/cover_simple/%s.jpg')
47
48
49 def _ebook_upload_to(upload_path):
50     return UploadToPath(upload_path)
51
52
53 class Book(models.Model):
54     """Represents a book imported from WL-XML."""
55     title = models.CharField(_('title'), max_length=32767)
56     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
57     sort_key_author = models.CharField(
58         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
59     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
60     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
61     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
62     description = models.TextField(_('description'), blank=True)
63     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
64     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
65     parent_number = models.IntegerField(_('parent number'), default=0)
66     extra_info = jsonfield.JSONField(_('extra information'), default={})
67     gazeta_link = models.CharField(blank=True, max_length=240)
68     wiki_link = models.CharField(blank=True, max_length=240)
69     print_on_demand = models.BooleanField(_('print on demand'), default=False)
70     recommended = models.BooleanField(_('recommended'), default=False)
71
72     # files generated during publication
73     cover = EbookField(
74         'cover', _('cover'),
75         null=True, blank=True,
76         upload_to=_cover_upload_to,
77         storage=bofh_storage, max_length=255)
78     # Cleaner version of cover for thumbs
79     cover_thumb = EbookField(
80         'cover_thumb', _('cover thumbnail'),
81         null=True, blank=True,
82         upload_to=_cover_thumb_upload_to,
83         max_length=255)
84     cover_api_thumb = EbookField(
85         'cover_api_thumb', _('cover thumbnail for mobile app'),
86         null=True, blank=True,
87         upload_to=_cover_api_thumb_upload_to,
88         max_length=255)
89     simple_cover = EbookField(
90         'simple_cover', _('cover for mobile app'),
91         null=True, blank=True,
92         upload_to=_simple_cover_upload_to,
93         max_length=255)
94     ebook_formats = constants.EBOOK_FORMATS
95     formats = ebook_formats + ['html', 'xml']
96
97     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
98     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
99
100     cached_author = models.CharField(blank=True, max_length=240, db_index=True)
101     has_audience = models.BooleanField(default=False)
102
103     objects = models.Manager()
104     tagged = managers.ModelTaggedItemManager(Tag)
105     tags = managers.TagDescriptor(Tag)
106     tag_relations = GenericRelation(Tag.intermediary_table_model)
107
108     html_built = django.dispatch.Signal()
109     published = django.dispatch.Signal()
110
111     short_html_url_name = 'catalogue_book_short'
112
113     class AlreadyExists(Exception):
114         pass
115
116     class Meta:
117         ordering = ('sort_key_author', 'sort_key')
118         verbose_name = _('book')
119         verbose_name_plural = _('books')
120         app_label = 'catalogue'
121
122     def __unicode__(self):
123         return self.title
124
125     def get_initial(self):
126         try:
127             return re.search(r'\w', self.title, re.U).group(0)
128         except AttributeError:
129             return ''
130
131     def authors(self):
132         return self.tags.filter(category='author')
133
134     def tag_unicode(self, category):
135         relations = prefetched_relations(self, category)
136         if relations:
137             return ', '.join(rel.tag.name for rel in relations)
138         else:
139             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
140
141     def tags_by_category(self):
142         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
143
144     def author_unicode(self):
145         return self.cached_author
146
147     def translator(self):
148         translators = self.extra_info.get('translators')
149         if not translators:
150             return None
151         if len(translators) > 3:
152             translators = translators[:2]
153             others = ' i inni'
154         else:
155             others = ''
156         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
157
158     def cover_source(self):
159         return self.extra_info.get('cover_source', self.parent.cover_source() if self.parent else '')
160
161     def save(self, force_insert=False, force_update=False, **kwargs):
162         from sortify import sortify
163
164         self.sort_key = sortify(self.title)[:120]
165         self.title = unicode(self.title)  # ???
166
167         try:
168             author = self.authors().first().sort_key
169         except AttributeError:
170             author = u''
171         self.sort_key_author = author
172
173         self.cached_author = self.tag_unicode('author')
174         self.has_audience = 'audience' in self.extra_info
175
176         ret = super(Book, self).save(force_insert, force_update, **kwargs)
177
178         return ret
179
180     @permalink
181     def get_absolute_url(self):
182         return 'catalogue.views.book_detail', [self.slug]
183
184     @staticmethod
185     @permalink
186     def create_url(slug):
187         return 'catalogue.views.book_detail', [slug]
188
189     def gallery_path(self):
190         return gallery_path(self.slug)
191
192     def gallery_url(self):
193         return gallery_url(self.slug)
194
195     @property
196     def name(self):
197         return self.title
198
199     def language_code(self):
200         return constants.LANGUAGES_3TO2.get(self.language, self.language)
201
202     def language_name(self):
203         return dict(settings.LANGUAGES).get(self.language_code(), "")
204
205     def is_foreign(self):
206         return self.language_code() != settings.LANGUAGE_CODE
207
208     def has_media(self, type_):
209         if type_ in Book.formats:
210             return bool(getattr(self, "%s_file" % type_))
211         else:
212             return self.media.filter(type=type_).exists()
213
214     def has_audio(self):
215         return self.has_media('mp3')
216
217     def get_media(self, type_):
218         if self.has_media(type_):
219             if type_ in Book.formats:
220                 return getattr(self, "%s_file" % type_)
221             else:
222                 return self.media.filter(type=type_)
223         else:
224             return None
225
226     def get_mp3(self):
227         return self.get_media("mp3")
228
229     def get_odt(self):
230         return self.get_media("odt")
231
232     def get_ogg(self):
233         return self.get_media("ogg")
234
235     def get_daisy(self):
236         return self.get_media("daisy")
237
238     def has_description(self):
239         return len(self.description) > 0
240     has_description.short_description = _('description')
241     has_description.boolean = True
242
243     # ugly ugly ugly
244     def has_mp3_file(self):
245         return bool(self.has_media("mp3"))
246     has_mp3_file.short_description = 'MP3'
247     has_mp3_file.boolean = True
248
249     def has_ogg_file(self):
250         return bool(self.has_media("ogg"))
251     has_ogg_file.short_description = 'OGG'
252     has_ogg_file.boolean = True
253
254     def has_daisy_file(self):
255         return bool(self.has_media("daisy"))
256     has_daisy_file.short_description = 'DAISY'
257     has_daisy_file.boolean = True
258
259     def get_audiobooks(self):
260         ogg_files = {}
261         for m in self.media.filter(type='ogg').order_by().iterator():
262             ogg_files[m.name] = m
263
264         audiobooks = []
265         projects = set()
266         for mp3 in self.media.filter(type='mp3').iterator():
267             # ogg files are always from the same project
268             meta = mp3.extra_info
269             project = meta.get('project')
270             if not project:
271                 # temporary fallback
272                 project = u'CzytamySłuchając'
273
274             projects.add((project, meta.get('funded_by', '')))
275
276             media = {'mp3': mp3}
277
278             ogg = ogg_files.get(mp3.name)
279             if ogg:
280                 media['ogg'] = ogg
281             audiobooks.append(media)
282
283         projects = sorted(projects)
284         return audiobooks, projects
285
286     def wldocument(self, parse_dublincore=True, inherit=True):
287         from catalogue.import_utils import ORMDocProvider
288         from librarian.parser import WLDocument
289
290         if inherit and self.parent:
291             meta_fallbacks = self.parent.cover_info()
292         else:
293             meta_fallbacks = None
294
295         return WLDocument.from_file(
296             self.xml_file.path,
297             provider=ORMDocProvider(self),
298             parse_dublincore=parse_dublincore,
299             meta_fallbacks=meta_fallbacks)
300
301     @staticmethod
302     def zip_format(format_):
303         def pretty_file_name(book):
304             return "%s/%s.%s" % (
305                 book.extra_info['author'],
306                 book.slug,
307                 format_)
308
309         field_name = "%s_file" % format_
310         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
311         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
312         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
313
314     def zip_audiobooks(self, format_):
315         bm = BookMedia.objects.filter(book=self, type=format_)
316         paths = map(lambda bm: (None, bm.file.path), bm)
317         return create_zip(paths, "%s_%s" % (self.slug, format_))
318
319     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
320         if index is None:
321             from search.index import Index
322             index = Index()
323         try:
324             index.index_book(self, book_info)
325             if index_tags:
326                 index.index_tags()
327             if commit:
328                 index.index.commit()
329         except Exception, e:
330             index.index.rollback()
331             raise e
332
333     def download_pictures(self, remote_gallery_url):
334         gallery_path = self.gallery_path()
335         # delete previous files, so we don't include old files in ebooks
336         if os.path.isdir(gallery_path):
337             for filename in os.listdir(gallery_path):
338                 file_path = os.path.join(gallery_path, filename)
339                 os.unlink(file_path)
340         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
341         if ilustr_elements:
342             makedirs(gallery_path)
343             for ilustr in ilustr_elements:
344                 ilustr_src = ilustr.get('src')
345                 ilustr_path = os.path.join(gallery_path, ilustr_src)
346                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
347
348     @classmethod
349     def from_xml_file(cls, xml_file, **kwargs):
350         from django.core.files import File
351         from librarian import dcparser
352
353         # use librarian to parse meta-data
354         book_info = dcparser.parse(xml_file)
355
356         if not isinstance(xml_file, File):
357             xml_file = File(open(xml_file))
358
359         try:
360             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
361         finally:
362             xml_file.close()
363
364     @classmethod
365     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
366                            search_index_tags=True, remote_gallery_url=None):
367         if dont_build is None:
368             dont_build = set()
369         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
370
371         # check for parts before we do anything
372         children = []
373         if hasattr(book_info, 'parts'):
374             for part_url in book_info.parts:
375                 try:
376                     children.append(Book.objects.get(slug=part_url.slug))
377                 except Book.DoesNotExist:
378                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
379
380         # Read book metadata
381         book_slug = book_info.url.slug
382         if re.search(r'[^a-z0-9-]', book_slug):
383             raise ValueError('Invalid characters in slug')
384         book, created = Book.objects.get_or_create(slug=book_slug)
385
386         if created:
387             book_shelves = []
388             old_cover = None
389         else:
390             if not overwrite:
391                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
392             # Save shelves for this book
393             book_shelves = list(book.tags.filter(category='set'))
394             old_cover = book.cover_info()
395
396         # Save XML file
397         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
398
399         book.language = book_info.language
400         book.title = book_info.title
401         if book_info.variant_of:
402             book.common_slug = book_info.variant_of.slug
403         else:
404             book.common_slug = book.slug
405         book.extra_info = book_info.to_dict()
406         book.save()
407
408         meta_tags = Tag.tags_from_info(book_info)
409
410         for tag in meta_tags:
411             if not tag.for_books:
412                 tag.for_books = True
413                 tag.save()
414
415         book.tags = set(meta_tags + book_shelves)
416
417         cover_changed = old_cover != book.cover_info()
418         obsolete_children = set(b for b in book.children.all()
419                                 if b not in children)
420         notify_cover_changed = []
421         for n, child_book in enumerate(children):
422             new_child = child_book.parent != book
423             child_book.parent = book
424             child_book.parent_number = n
425             child_book.save()
426             if new_child or cover_changed:
427                 notify_cover_changed.append(child_book)
428         # Disown unfaithful children and let them cope on their own.
429         for child in obsolete_children:
430             child.parent = None
431             child.parent_number = 0
432             child.save()
433             if old_cover:
434                 notify_cover_changed.append(child)
435
436         cls.repopulate_ancestors()
437         tasks.update_counters.delay()
438
439         if remote_gallery_url:
440             book.download_pictures(remote_gallery_url)
441
442         # No saves beyond this point.
443
444         # Build cover.
445         if 'cover' not in dont_build:
446             book.cover.build_delay()
447             book.cover_thumb.build_delay()
448             book.cover_api_thumb.build_delay()
449             book.simple_cover.build_delay()
450
451         # Build HTML and ebooks.
452         book.html_file.build_delay()
453         if not children:
454             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
455                 if format_ not in dont_build:
456                     getattr(book, '%s_file' % format_).build_delay()
457         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
458             if format_ not in dont_build:
459                 getattr(book, '%s_file' % format_).build_delay()
460
461         if not settings.NO_SEARCH_INDEX and search_index:
462             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
463
464         for child in notify_cover_changed:
465             child.parent_cover_changed()
466
467         book.save()  # update sort_key_author
468         book.update_popularity()
469         cls.published.send(sender=cls, instance=book)
470         return book
471
472     @classmethod
473     @transaction.atomic
474     def repopulate_ancestors(cls):
475         """Fixes the ancestry cache."""
476         # TODO: table names
477         cursor = connection.cursor()
478         if connection.vendor == 'postgres':
479             cursor.execute("TRUNCATE catalogue_book_ancestor")
480             cursor.execute("""
481                 WITH RECURSIVE ancestry AS (
482                     SELECT book.id, book.parent_id
483                     FROM catalogue_book AS book
484                     WHERE book.parent_id IS NOT NULL
485                     UNION
486                     SELECT ancestor.id, book.parent_id
487                     FROM ancestry AS ancestor, catalogue_book AS book
488                     WHERE ancestor.parent_id = book.id
489                         AND book.parent_id IS NOT NULL
490                     )
491                 INSERT INTO catalogue_book_ancestor
492                     (from_book_id, to_book_id)
493                     SELECT id, parent_id
494                     FROM ancestry
495                     ORDER BY id;
496                 """)
497         else:
498             cursor.execute("DELETE FROM catalogue_book_ancestor")
499             for b in cls.objects.exclude(parent=None):
500                 parent = b.parent
501                 while parent is not None:
502                     b.ancestor.add(parent)
503                     parent = parent.parent
504
505     def flush_includes(self, languages=True):
506         if not languages:
507             return
508         if languages is True:
509             languages = [lc for (lc, _ln) in settings.LANGUAGES]
510         flush_ssi_includes([
511             template % (self.pk, lang)
512             for template in [
513                 '/katalog/b/%d/mini.%s.html',
514                 '/katalog/b/%d/mini_nolink.%s.html',
515                 '/katalog/b/%d/short.%s.html',
516                 '/katalog/b/%d/wide.%s.html',
517                 '/api/include/book/%d.%s.json',
518                 '/api/include/book/%d.%s.xml',
519                 ]
520             for lang in languages
521             ])
522
523     def cover_info(self, inherit=True):
524         """Returns a dictionary to serve as fallback for BookInfo.
525
526         For now, the only thing inherited is the cover image.
527         """
528         need = False
529         info = {}
530         for field in ('cover_url', 'cover_by', 'cover_source'):
531             val = self.extra_info.get(field)
532             if val:
533                 info[field] = val
534             else:
535                 need = True
536         if inherit and need and self.parent is not None:
537             parent_info = self.parent.cover_info()
538             parent_info.update(info)
539             info = parent_info
540         return info
541
542     def related_themes(self):
543         return Tag.objects.usage_for_queryset(
544             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
545             counts=True).filter(category='theme')
546
547     def parent_cover_changed(self):
548         """Called when parent book's cover image is changed."""
549         if not self.cover_info(inherit=False):
550             if 'cover' not in app_settings.DONT_BUILD:
551                 self.cover.build_delay()
552                 self.cover_thumb.build_delay()
553                 self.cover_api_thumb.build_delay()
554                 self.simple_cover.build_delay()
555             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
556                 if format_ not in app_settings.DONT_BUILD:
557                     getattr(self, '%s_file' % format_).build_delay()
558             for child in self.children.all():
559                 child.parent_cover_changed()
560
561     def other_versions(self):
562         """Find other versions (i.e. in other languages) of the book."""
563         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
564
565     def parents(self):
566         books = []
567         parent = self.parent
568         while parent is not None:
569             books.insert(0, parent)
570             parent = parent.parent
571         return books
572
573     def pretty_title(self, html_links=False):
574         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
575         books = self.parents() + [self]
576         names.extend([(b.title, b.get_absolute_url()) for b in books])
577
578         if html_links:
579             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
580         else:
581             names = [tag[0] for tag in names]
582         return ', '.join(names)
583
584     def publisher(self):
585         publisher = self.extra_info['publisher']
586         if isinstance(publisher, basestring):
587             return publisher
588         elif isinstance(publisher, list):
589             return ', '.join(publisher)
590
591     @classmethod
592     def tagged_top_level(cls, tags):
593         """ Returns top-level books tagged with `tags`.
594
595         It only returns those books which don't have ancestors which are
596         also tagged with those tags.
597
598         """
599         objects = cls.tagged.with_all(tags)
600         return objects.exclude(ancestor__in=objects)
601
602     @classmethod
603     def book_list(cls, book_filter=None):
604         """Generates a hierarchical listing of all books.
605
606         Books are optionally filtered with a test function.
607
608         """
609
610         books_by_parent = {}
611         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
612         if book_filter:
613             books = books.filter(book_filter).distinct()
614
615             book_ids = set(b['pk'] for b in books.values("pk").iterator())
616             for book in books.iterator():
617                 parent = book.parent_id
618                 if parent not in book_ids:
619                     parent = None
620                 books_by_parent.setdefault(parent, []).append(book)
621         else:
622             for book in books.iterator():
623                 books_by_parent.setdefault(book.parent_id, []).append(book)
624
625         orphans = []
626         books_by_author = OrderedDict()
627         for tag in Tag.objects.filter(category='author').iterator():
628             books_by_author[tag] = []
629
630         for book in books_by_parent.get(None, ()):
631             authors = list(book.authors().only('pk'))
632             if authors:
633                 for author in authors:
634                     books_by_author[author].append(book)
635             else:
636                 orphans.append(book)
637
638         return books_by_author, orphans, books_by_parent
639
640     _audiences_pl = {
641         "SP": (1, u"szkoła podstawowa"),
642         "SP1": (1, u"szkoła podstawowa"),
643         "SP2": (1, u"szkoła podstawowa"),
644         "SP3": (1, u"szkoła podstawowa"),
645         "P": (1, u"szkoła podstawowa"),
646         "G": (2, u"gimnazjum"),
647         "L": (3, u"liceum"),
648         "LP": (3, u"liceum"),
649     }
650
651     def audiences_pl(self):
652         audiences = self.extra_info.get('audiences', [])
653         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
654         return [a[1] for a in audiences]
655
656     def stage_note(self):
657         stage = self.extra_info.get('stage')
658         if stage and stage < '0.4':
659             return (_('This work needs modernisation'),
660                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
661         else:
662             return None, None
663
664     def choose_fragment(self):
665         fragments = self.fragments.order_by()
666         fragments_count = fragments.count()
667         if not fragments_count and self.children.exists():
668             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
669             fragments_count = fragments.count()
670         if fragments_count:
671             return fragments[randint(0, fragments_count - 1)]
672         elif self.parent:
673             return self.parent.choose_fragment()
674         else:
675             return None
676
677     def fragment_data(self):
678         fragment = self.choose_fragment()
679         if fragment:
680             return {'title': fragment.book.pretty_title(), 'html': fragment.get_short_text()}
681         else:
682             return None
683
684     def update_popularity(self):
685         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
686         try:
687             pop = self.popularity
688             pop.count = count
689             pop.save()
690         except BookPopularity.DoesNotExist:
691             BookPopularity.objects.create(book=self, count=count)
692
693     def ridero_link(self):
694         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
695
696
697 def add_file_fields():
698     for format_ in Book.formats:
699         field_name = "%s_file" % format_
700         # This weird globals() assignment makes Django migrations comfortable.
701         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
702         _upload_to.__name__ = '_%s_upload_to' % format_
703         globals()[_upload_to.__name__] = _upload_to
704
705         EbookField(
706             format_, _("%s file" % format_.upper()),
707             upload_to=_upload_to,
708             storage=bofh_storage,
709             max_length=255,
710             blank=True,
711             default=''
712         ).contribute_to_class(Book, field_name)
713
714 add_file_fields()
715
716
717 class BookPopularity(models.Model):
718     book = models.OneToOneField(Book, related_name='popularity')
719     count = models.IntegerField(default=0, db_index=True)