97c63d08c06734054812a1b3c255db17415ec9e4
[wolnelektury.git] / src / catalogue / helpers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.contrib.contenttypes.models import ContentType
7 from django.core.cache import cache
8
9 from .models import Tag, Book
10 from os.path import getmtime
11 import cPickle
12 from collections import defaultdict
13
14
15 BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind')
16
17 _COUNTERS = None
18 _COUNTER_TIME = None
19
20
21 def get_top_level_related_tags(tags, categories=None):
22     """
23     Finds tags related to given tags through books, and counts their usage.
24
25     Takes ancestry into account: if a tag is applied to a book, its
26     usage on the book's descendants is ignored.
27     """
28     global _COUNTERS, _COUNTER_TIME
29     # First, check that we have a valid and recent version of the counters.
30     if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME:
31         with open(settings.CATALOGUE_COUNTERS_FILE) as f:
32             _COUNTERS = cPickle.load(f)
33
34     tagids = tuple(sorted(t.pk for t in tags))
35     try:
36         related_ids = _COUNTERS['next'][tagids]
37     except KeyError:
38         return
39
40     related = Tag.objects.filter(pk__in=related_ids)
41
42     if categories is not None:
43         related = related.filter(category__in=categories)
44
45     for tag in related:
46         tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))]
47         yield tag
48
49
50 def update_counters():
51     def combinations(things):
52         if len(things):
53             for c in combinations(things[1:]):
54                 yield c
55                 yield (things[0],) + c
56         else:
57             yield ()
58
59     def count_for_book(book, count_by_combination=None, parent_combinations=None):
60         if not parent_combinations:
61             parent_combinations = set()
62         tags = sorted(book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind')).values_list('pk', flat=True))
63         combs = list(combinations(tags))
64         for c in combs:
65             if c not in parent_combinations:
66                 count_by_combination[c] += 1
67         combs_for_child = set(list(parent_combinations) + combs)
68         for child in book.children.all():
69             count_for_book(child, count_by_combination, combs_for_child)
70
71     count_by_combination = defaultdict(lambda: 0)
72     for b in Book.objects.filter(parent=None):
73         count_for_book(b, count_by_combination)
74
75     next_combinations = defaultdict(set)
76     # Now build an index of all combinations.
77     for c in count_by_combination.keys():
78         if not c:
79             continue
80         for n in c:
81             rest = tuple(x for x in c if x != n)
82             next_combinations[rest].add(n)
83
84     counters = {
85         "count": dict(count_by_combination),
86         "next": dict(next_combinations),
87     }
88
89     with open(settings.CATALOGUE_COUNTERS_FILE, 'w') as f:
90         cPickle.dump(counters, f)
91
92
93 def get_audiobook_tags():
94     audiobook_tag_ids = cache.get('audiobook_tags')
95     if audiobook_tag_ids is None:
96         books_with_audiobook = Book.objects.filter(media__type__in=('mp3', 'ogg'))\
97             .distinct().values_list('pk', flat=True)
98         audiobook_tag_ids = Tag.objects.filter(
99             items__content_type=ContentType.objects.get_for_model(Book),
100             items__object_id__in=list(books_with_audiobook)).distinct().values_list('pk', flat=True)
101         audiobook_tag_ids = list(audiobook_tag_ids)
102         cache.set('audiobook_tags', audiobook_tag_ids)
103     return audiobook_tag_ids