option to turn off ssify just for api + some optimizations
[wolnelektury.git] / src / catalogue / helpers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from .models import Tag, Book
7 from os.path import getmtime
8 import cPickle
9 from collections import defaultdict
10
11
12 BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind')
13
14 _COUNTERS = None
15 _COUNTER_TIME = None
16
17
18 def get_top_level_related_tags(tags, categories=None):
19     """
20     Finds tags related to given tags through books, and counts their usage.
21
22     Takes ancestry into account: if a tag is applied to a book, its
23     usage on the book's descendants is ignored.
24     """
25     global _COUNTERS, _COUNTER_TIME
26     # First, check that we have a valid and recent version of the counters.
27     if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME:
28         with open(settings.CATALOGUE_COUNTERS_FILE) as f:
29             _COUNTERS = cPickle.load(f)
30
31     tagids = tuple(sorted(t.pk for t in tags))
32     try:
33         related_ids = _COUNTERS['next'][tagids]
34     except KeyError:
35         return
36
37     related = Tag.objects.filter(pk__in=related_ids)
38
39     # TODO: do we really need that?
40     if categories is not None:
41         related = related.filter(category__in=categories)
42
43     for tag in related:
44         tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))]
45         yield tag
46
47
48 def update_counters():
49     def combinations(things):
50         if len(things):
51             for c in combinations(things[1:]):
52                 yield c
53                 yield (things[0],) + c
54         else:
55             yield ()
56
57     def count_for_book(book, count_by_combination=None, parent_combinations=None):
58         if not parent_combinations:
59             parent_combinations = set()
60         tags = sorted(book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind')).values_list('pk', flat=True))
61         combs = list(combinations(tags))
62         for c in combs:
63             if c not in parent_combinations:
64                 count_by_combination[c] += 1
65         combs_for_child = set(list(parent_combinations) + combs)
66         for child in book.children.all():
67             count_for_book(child, count_by_combination, combs_for_child)
68
69     count_by_combination = defaultdict(lambda: 0)
70     for b in Book.objects.filter(parent=None):
71         count_for_book(b, count_by_combination)
72
73     next_combinations = defaultdict(set)
74     # Now build an index of all combinations.
75     for c in count_by_combination.keys():
76         if not c:
77             continue
78         for n in c:
79             rest = tuple(x for x in c if x != n)
80             next_combinations[rest].add(n)
81
82     counters = {
83         "count": dict(count_by_combination),
84         "next": dict(next_combinations),
85     }
86
87     with open(settings.CATALOGUE_COUNTERS_FILE, 'w') as f:
88         cPickle.dump(counters, f)