revamp search hints
[wolnelektury.git] / src / catalogue / models / book.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from collections import OrderedDict
6 from random import randint
7 import os.path
8 import re
9 import urllib
10 from django.conf import settings
11 from django.db import connection, models, transaction
12 from django.db.models import permalink
13 import django.dispatch
14 from django.contrib.contenttypes.fields import GenericRelation
15 from django.core.urlresolvers import reverse
16 from django.utils.translation import ugettext_lazy as _, get_language
17 import jsonfield
18 from fnpdjango.storage import BofhFileSystemStorage
19 from ssify import flush_ssi_includes
20 from newtagging import managers
21 from catalogue import constants
22 from catalogue.fields import EbookField
23 from catalogue.models import Tag, Fragment, BookMedia
24 from catalogue.utils import create_zip, gallery_url, gallery_path, split_tags
25 from catalogue.models.tag import prefetched_relations
26 from catalogue import app_settings
27 from catalogue import tasks
28 from wolnelektury.utils import makedirs
29
30 bofh_storage = BofhFileSystemStorage()
31
32
33 def _make_upload_to(path):
34     def _upload_to(i, n):
35         return path % i.slug
36     return _upload_to
37
38
39 _cover_upload_to = _make_upload_to('book/cover/%s.jpg')
40 _cover_thumb_upload_to = _make_upload_to('book/cover_thumb/%s.jpg')
41
42
43 def _ebook_upload_to(upload_path):
44     return _make_upload_to(upload_path)
45
46
47 class Book(models.Model):
48     """Represents a book imported from WL-XML."""
49     title = models.CharField(_('title'), max_length=32767)
50     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
51     sort_key_author = models.CharField(
52         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
53     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
54     common_slug = models.SlugField(_('slug'), max_length=120, db_index=True)
55     language = models.CharField(_('language code'), max_length=3, db_index=True, default=app_settings.DEFAULT_LANGUAGE)
56     description = models.TextField(_('description'), blank=True)
57     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
58     changed_at = models.DateTimeField(_('change date'), auto_now=True, db_index=True)
59     parent_number = models.IntegerField(_('parent number'), default=0)
60     extra_info = jsonfield.JSONField(_('extra information'), default={})
61     gazeta_link = models.CharField(blank=True, max_length=240)
62     wiki_link = models.CharField(blank=True, max_length=240)
63     print_on_demand = models.BooleanField(_('print on demand'), default=False)
64     recommended = models.BooleanField(_('recommended'), default=False)
65
66     # files generated during publication
67     cover = EbookField(
68         'cover', _('cover'),
69         null=True, blank=True,
70         upload_to=_cover_upload_to,
71         storage=bofh_storage, max_length=255)
72     # Cleaner version of cover for thumbs
73     cover_thumb = EbookField(
74         'cover_thumb', _('cover thumbnail'),
75         null=True, blank=True,
76         upload_to=_cover_thumb_upload_to,
77         max_length=255)
78     ebook_formats = constants.EBOOK_FORMATS
79     formats = ebook_formats + ['html', 'xml']
80
81     parent = models.ForeignKey('self', blank=True, null=True, related_name='children')
82     ancestor = models.ManyToManyField('self', blank=True, editable=False, related_name='descendant', symmetrical=False)
83
84     objects = models.Manager()
85     tagged = managers.ModelTaggedItemManager(Tag)
86     tags = managers.TagDescriptor(Tag)
87     tag_relations = GenericRelation(Tag.intermediary_table_model)
88
89     html_built = django.dispatch.Signal()
90     published = django.dispatch.Signal()
91
92     short_html_url_name = 'catalogue_book_short'
93
94     class AlreadyExists(Exception):
95         pass
96
97     class Meta:
98         ordering = ('sort_key_author', 'sort_key')
99         verbose_name = _('book')
100         verbose_name_plural = _('books')
101         app_label = 'catalogue'
102
103     def __unicode__(self):
104         return self.title
105
106     def get_initial(self):
107         try:
108             return re.search(r'\w', self.title, re.U).group(0)
109         except AttributeError:
110             return ''
111
112     def authors(self):
113         return self.tags.filter(category='author')
114
115     def tag_unicode(self, category):
116         relations = prefetched_relations(self, category)
117         if relations:
118             return ', '.join(rel.tag.name for rel in relations)
119         else:
120             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
121
122     def tags_by_category(self):
123         return split_tags(self.tags.exclude(category__in=('set', 'theme')))
124
125     def author_unicode(self):
126         return self.tag_unicode('author')
127
128     def translator(self):
129         translators = self.extra_info.get('translators')
130         if not translators:
131             return None
132         if len(translators) > 3:
133             translators = translators[:2]
134             others = ' i inni'
135         else:
136             others = ''
137         return ', '.join(u'\xa0'.join(reversed(translator.split(', ', 1))) for translator in translators) + others
138
139     def save(self, force_insert=False, force_update=False, **kwargs):
140         from sortify import sortify
141
142         self.sort_key = sortify(self.title)[:120]
143         self.title = unicode(self.title)  # ???
144
145         try:
146             author = self.authors().first().sort_key
147         except AttributeError:
148             author = u''
149         self.sort_key_author = author
150
151         ret = super(Book, self).save(force_insert, force_update, **kwargs)
152
153         return ret
154
155     @permalink
156     def get_absolute_url(self):
157         return 'catalogue.views.book_detail', [self.slug]
158
159     @staticmethod
160     @permalink
161     def create_url(slug):
162         return 'catalogue.views.book_detail', [slug]
163
164     def gallery_path(self):
165         return gallery_path(self.slug)
166
167     def gallery_url(self):
168         return gallery_url(self.slug)
169
170     @property
171     def name(self):
172         return self.title
173
174     def language_code(self):
175         return constants.LANGUAGES_3TO2.get(self.language, self.language)
176
177     def language_name(self):
178         return dict(settings.LANGUAGES).get(self.language_code(), "")
179
180     def is_foreign(self):
181         return self.language_code() != settings.LANGUAGE_CODE
182
183     def has_media(self, type_):
184         if type_ in Book.formats:
185             return bool(getattr(self, "%s_file" % type_))
186         else:
187             return self.media.filter(type=type_).exists()
188
189     def get_media(self, type_):
190         if self.has_media(type_):
191             if type_ in Book.formats:
192                 return getattr(self, "%s_file" % type_)
193             else:
194                 return self.media.filter(type=type_)
195         else:
196             return None
197
198     def get_mp3(self):
199         return self.get_media("mp3")
200
201     def get_odt(self):
202         return self.get_media("odt")
203
204     def get_ogg(self):
205         return self.get_media("ogg")
206
207     def get_daisy(self):
208         return self.get_media("daisy")
209
210     def has_description(self):
211         return len(self.description) > 0
212     has_description.short_description = _('description')
213     has_description.boolean = True
214
215     # ugly ugly ugly
216     def has_mp3_file(self):
217         return bool(self.has_media("mp3"))
218     has_mp3_file.short_description = 'MP3'
219     has_mp3_file.boolean = True
220
221     def has_ogg_file(self):
222         return bool(self.has_media("ogg"))
223     has_ogg_file.short_description = 'OGG'
224     has_ogg_file.boolean = True
225
226     def has_daisy_file(self):
227         return bool(self.has_media("daisy"))
228     has_daisy_file.short_description = 'DAISY'
229     has_daisy_file.boolean = True
230
231     def get_audiobooks(self):
232         ogg_files = {}
233         for m in self.media.filter(type='ogg').order_by().iterator():
234             ogg_files[m.name] = m
235
236         audiobooks = []
237         projects = set()
238         for mp3 in self.media.filter(type='mp3').iterator():
239             # ogg files are always from the same project
240             meta = mp3.extra_info
241             project = meta.get('project')
242             if not project:
243                 # temporary fallback
244                 project = u'CzytamySłuchając'
245
246             projects.add((project, meta.get('funded_by', '')))
247
248             media = {'mp3': mp3}
249
250             ogg = ogg_files.get(mp3.name)
251             if ogg:
252                 media['ogg'] = ogg
253             audiobooks.append(media)
254
255         projects = sorted(projects)
256         return audiobooks, projects
257
258     def wldocument(self, parse_dublincore=True, inherit=True):
259         from catalogue.import_utils import ORMDocProvider
260         from librarian.parser import WLDocument
261
262         if inherit and self.parent:
263             meta_fallbacks = self.parent.cover_info()
264         else:
265             meta_fallbacks = None
266
267         return WLDocument.from_file(
268             self.xml_file.path,
269             provider=ORMDocProvider(self),
270             parse_dublincore=parse_dublincore,
271             meta_fallbacks=meta_fallbacks)
272
273     @staticmethod
274     def zip_format(format_):
275         def pretty_file_name(book):
276             return "%s/%s.%s" % (
277                 book.extra_info['author'],
278                 book.slug,
279                 format_)
280
281         field_name = "%s_file" % format_
282         books = Book.objects.filter(parent=None).exclude(**{field_name: ""})
283         paths = [(pretty_file_name(b), getattr(b, field_name).path) for b in books.iterator()]
284         return create_zip(paths, app_settings.FORMAT_ZIPS[format_])
285
286     def zip_audiobooks(self, format_):
287         bm = BookMedia.objects.filter(book=self, type=format_)
288         paths = map(lambda bm: (None, bm.file.path), bm)
289         return create_zip(paths, "%s_%s" % (self.slug, format_))
290
291     def search_index(self, book_info=None, index=None, index_tags=True, commit=True):
292         if index is None:
293             from search.index import Index
294             index = Index()
295         try:
296             index.index_book(self, book_info)
297             if index_tags:
298                 index.index_tags()
299             if commit:
300                 index.index.commit()
301         except Exception, e:
302             index.index.rollback()
303             raise e
304
305     def download_pictures(self, remote_gallery_url):
306         gallery_path = self.gallery_path()
307         # delete previous files, so we don't include old files in ebooks
308         if os.path.isdir(gallery_path):
309             for filename in os.listdir(gallery_path):
310                 file_path = os.path.join(gallery_path, filename)
311                 os.unlink(file_path)
312         ilustr_elements = list(self.wldocument().edoc.findall('//ilustr'))
313         if ilustr_elements:
314             makedirs(gallery_path)
315             for ilustr in ilustr_elements:
316                 ilustr_src = ilustr.get('src')
317                 ilustr_path = os.path.join(gallery_path, ilustr_src)
318                 urllib.urlretrieve('%s/%s' % (remote_gallery_url, ilustr_src), ilustr_path)
319
320     @classmethod
321     def from_xml_file(cls, xml_file, **kwargs):
322         from django.core.files import File
323         from librarian import dcparser
324
325         # use librarian to parse meta-data
326         book_info = dcparser.parse(xml_file)
327
328         if not isinstance(xml_file, File):
329             xml_file = File(open(xml_file))
330
331         try:
332             return cls.from_text_and_meta(xml_file, book_info, **kwargs)
333         finally:
334             xml_file.close()
335
336     @classmethod
337     def from_text_and_meta(cls, raw_file, book_info, overwrite=False, dont_build=None, search_index=True,
338                            search_index_tags=True, remote_gallery_url=None):
339         if dont_build is None:
340             dont_build = set()
341         dont_build = set.union(set(dont_build), set(app_settings.DONT_BUILD))
342
343         # check for parts before we do anything
344         children = []
345         if hasattr(book_info, 'parts'):
346             for part_url in book_info.parts:
347                 try:
348                     children.append(Book.objects.get(slug=part_url.slug))
349                 except Book.DoesNotExist:
350                     raise Book.DoesNotExist(_('Book "%s" does not exist.') % part_url.slug)
351
352         # Read book metadata
353         book_slug = book_info.url.slug
354         if re.search(r'[^a-z0-9-]', book_slug):
355             raise ValueError('Invalid characters in slug')
356         book, created = Book.objects.get_or_create(slug=book_slug)
357
358         if created:
359             book_shelves = []
360             old_cover = None
361         else:
362             if not overwrite:
363                 raise Book.AlreadyExists(_('Book %s already exists') % book_slug)
364             # Save shelves for this book
365             book_shelves = list(book.tags.filter(category='set'))
366             old_cover = book.cover_info()
367
368         # Save XML file
369         book.xml_file.save('%s.xml' % book.slug, raw_file, save=False)
370
371         book.language = book_info.language
372         book.title = book_info.title
373         if book_info.variant_of:
374             book.common_slug = book_info.variant_of.slug
375         else:
376             book.common_slug = book.slug
377         book.extra_info = book_info.to_dict()
378         book.save()
379
380         meta_tags = Tag.tags_from_info(book_info)
381
382         book.tags = set(meta_tags + book_shelves)
383
384         cover_changed = old_cover != book.cover_info()
385         obsolete_children = set(b for b in book.children.all()
386                                 if b not in children)
387         notify_cover_changed = []
388         for n, child_book in enumerate(children):
389             new_child = child_book.parent != book
390             child_book.parent = book
391             child_book.parent_number = n
392             child_book.save()
393             if new_child or cover_changed:
394                 notify_cover_changed.append(child_book)
395         # Disown unfaithful children and let them cope on their own.
396         for child in obsolete_children:
397             child.parent = None
398             child.parent_number = 0
399             child.save()
400             if old_cover:
401                 notify_cover_changed.append(child)
402
403         cls.repopulate_ancestors()
404         tasks.update_counters.delay()
405
406         if remote_gallery_url:
407             book.download_pictures(remote_gallery_url)
408
409         # No saves beyond this point.
410
411         # Build cover.
412         if 'cover' not in dont_build:
413             book.cover.build_delay()
414             book.cover_thumb.build_delay()
415
416         # Build HTML and ebooks.
417         book.html_file.build_delay()
418         if not children:
419             for format_ in constants.EBOOK_FORMATS_WITHOUT_CHILDREN:
420                 if format_ not in dont_build:
421                     getattr(book, '%s_file' % format_).build_delay()
422         for format_ in constants.EBOOK_FORMATS_WITH_CHILDREN:
423             if format_ not in dont_build:
424                 getattr(book, '%s_file' % format_).build_delay()
425
426         if not settings.NO_SEARCH_INDEX and search_index:
427             tasks.index_book.delay(book.id, book_info=book_info, index_tags=search_index_tags)
428
429         for child in notify_cover_changed:
430             child.parent_cover_changed()
431
432         book.save()  # update sort_key_author
433         cls.published.send(sender=cls, instance=book)
434         return book
435
436     @classmethod
437     @transaction.atomic
438     def repopulate_ancestors(cls):
439         """Fixes the ancestry cache."""
440         # TODO: table names
441         cursor = connection.cursor()
442         if connection.vendor == 'postgres':
443             cursor.execute("TRUNCATE catalogue_book_ancestor")
444             cursor.execute("""
445                 WITH RECURSIVE ancestry AS (
446                     SELECT book.id, book.parent_id
447                     FROM catalogue_book AS book
448                     WHERE book.parent_id IS NOT NULL
449                     UNION
450                     SELECT ancestor.id, book.parent_id
451                     FROM ancestry AS ancestor, catalogue_book AS book
452                     WHERE ancestor.parent_id = book.id
453                         AND book.parent_id IS NOT NULL
454                     )
455                 INSERT INTO catalogue_book_ancestor
456                     (from_book_id, to_book_id)
457                     SELECT id, parent_id
458                     FROM ancestry
459                     ORDER BY id;
460                 """)
461         else:
462             cursor.execute("DELETE FROM catalogue_book_ancestor")
463             for b in cls.objects.exclude(parent=None):
464                 parent = b.parent
465                 while parent is not None:
466                     b.ancestor.add(parent)
467                     parent = parent.parent
468
469     def flush_includes(self, languages=True):
470         if not languages:
471             return
472         if languages is True:
473             languages = [lc for (lc, _ln) in settings.LANGUAGES]
474         flush_ssi_includes([
475             template % (self.pk, lang)
476             for template in [
477                 '/katalog/b/%d/mini.%s.html',
478                 '/katalog/b/%d/mini_nolink.%s.html',
479                 '/katalog/b/%d/short.%s.html',
480                 '/katalog/b/%d/wide.%s.html',
481                 '/api/include/book/%d.%s.json',
482                 '/api/include/book/%d.%s.xml',
483                 ]
484             for lang in languages
485             ])
486
487     def cover_info(self, inherit=True):
488         """Returns a dictionary to serve as fallback for BookInfo.
489
490         For now, the only thing inherited is the cover image.
491         """
492         need = False
493         info = {}
494         for field in ('cover_url', 'cover_by', 'cover_source'):
495             val = self.extra_info.get(field)
496             if val:
497                 info[field] = val
498             else:
499                 need = True
500         if inherit and need and self.parent is not None:
501             parent_info = self.parent.cover_info()
502             parent_info.update(info)
503             info = parent_info
504         return info
505
506     def related_themes(self):
507         return Tag.objects.usage_for_queryset(
508             Fragment.objects.filter(models.Q(book=self) | models.Q(book__ancestor=self)),
509             counts=True).filter(category='theme')
510
511     def parent_cover_changed(self):
512         """Called when parent book's cover image is changed."""
513         if not self.cover_info(inherit=False):
514             if 'cover' not in app_settings.DONT_BUILD:
515                 self.cover.build_delay()
516                 self.cover_thumb.build_delay()
517             for format_ in constants.EBOOK_FORMATS_WITH_COVERS:
518                 if format_ not in app_settings.DONT_BUILD:
519                     getattr(self, '%s_file' % format_).build_delay()
520             for child in self.children.all():
521                 child.parent_cover_changed()
522
523     def other_versions(self):
524         """Find other versions (i.e. in other languages) of the book."""
525         return type(self).objects.filter(common_slug=self.common_slug).exclude(pk=self.pk)
526
527     def parents(self):
528         books = []
529         parent = self.parent
530         while parent is not None:
531             books.insert(0, parent)
532             parent = parent.parent
533         return books
534
535     def pretty_title(self, html_links=False):
536         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
537         books = self.parents() + [self]
538         names.extend([(b.title, b.get_absolute_url()) for b in books])
539
540         if html_links:
541             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
542         else:
543             names = [tag[0] for tag in names]
544         return ', '.join(names)
545
546     def publisher(self):
547         publisher = self.extra_info['publisher']
548         if isinstance(publisher, basestring):
549             return publisher
550         elif isinstance(publisher, list):
551             return ', '.join(publisher)
552
553     @classmethod
554     def tagged_top_level(cls, tags):
555         """ Returns top-level books tagged with `tags`.
556
557         It only returns those books which don't have ancestors which are
558         also tagged with those tags.
559
560         """
561         objects = cls.tagged.with_all(tags)
562         return objects.exclude(ancestor__in=objects)
563
564     @classmethod
565     def book_list(cls, book_filter=None):
566         """Generates a hierarchical listing of all books.
567
568         Books are optionally filtered with a test function.
569
570         """
571
572         books_by_parent = {}
573         books = cls.objects.order_by('parent_number', 'sort_key').only('title', 'parent', 'slug')
574         if book_filter:
575             books = books.filter(book_filter).distinct()
576
577             book_ids = set(b['pk'] for b in books.values("pk").iterator())
578             for book in books.iterator():
579                 parent = book.parent_id
580                 if parent not in book_ids:
581                     parent = None
582                 books_by_parent.setdefault(parent, []).append(book)
583         else:
584             for book in books.iterator():
585                 books_by_parent.setdefault(book.parent_id, []).append(book)
586
587         orphans = []
588         books_by_author = OrderedDict()
589         for tag in Tag.objects.filter(category='author').iterator():
590             books_by_author[tag] = []
591
592         for book in books_by_parent.get(None, ()):
593             authors = list(book.authors().only('pk'))
594             if authors:
595                 for author in authors:
596                     books_by_author[author].append(book)
597             else:
598                 orphans.append(book)
599
600         return books_by_author, orphans, books_by_parent
601
602     _audiences_pl = {
603         "SP": (1, u"szkoła podstawowa"),
604         "SP1": (1, u"szkoła podstawowa"),
605         "SP2": (1, u"szkoła podstawowa"),
606         "SP3": (1, u"szkoła podstawowa"),
607         "P": (1, u"szkoła podstawowa"),
608         "G": (2, u"gimnazjum"),
609         "L": (3, u"liceum"),
610         "LP": (3, u"liceum"),
611     }
612
613     def audiences_pl(self):
614         audiences = self.extra_info.get('audiences', [])
615         audiences = sorted(set([self._audiences_pl.get(a, (99, a)) for a in audiences]))
616         return [a[1] for a in audiences]
617
618     def stage_note(self):
619         stage = self.extra_info.get('stage')
620         if stage and stage < '0.4':
621             return (_('This work needs modernisation'),
622                     reverse('infopage', args=['wymagajace-uwspolczesnienia']))
623         else:
624             return None, None
625
626     def choose_fragment(self):
627         fragments = self.fragments.order_by()
628         fragments_count = fragments.count()
629         if not fragments_count and self.children.exists():
630             fragments = Fragment.objects.filter(book__ancestor=self).order_by()
631             fragments_count = fragments.count()
632         if fragments_count:
633             return fragments[randint(0, fragments_count - 1)]
634         elif self.parent:
635             return self.parent.choose_fragment()
636         else:
637             return None
638
639     def update_popularity(self):
640         count = self.tags.filter(category='set').values('user').order_by('user').distinct().count()
641         try:
642             pop = self.popularity
643             pop.count = count
644             pop.save()
645         except BookPopularity.DoesNotExist:
646             BookPopularity.objects.create(book=self, count=count)
647
648     def ridero_link(self):
649         return 'https://ridero.eu/%s/books/wl_%s/' % (get_language(), self.slug.replace('-', '_'))
650
651
652 def add_file_fields():
653     for format_ in Book.formats:
654         field_name = "%s_file" % format_
655         # This weird globals() assignment makes Django migrations comfortable.
656         _upload_to = _ebook_upload_to('book/%s/%%s.%s' % (format_, format_))
657         _upload_to.__name__ = '_%s_upload_to' % format_
658         globals()[_upload_to.__name__] = _upload_to
659
660         EbookField(
661             format_, _("%s file" % format_.upper()),
662             upload_to=_upload_to,
663             storage=bofh_storage,
664             max_length=255,
665             blank=True,
666             default=''
667         ).contribute_to_class(Book, field_name)
668
669 add_file_fields()
670
671
672 class BookPopularity(models.Model):
673     book = models.OneToOneField(Book, related_name='popularity')
674     count = models.IntegerField(default=0)