Generally working version.
[wolnelektury.git] / src / catalogue / helpers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.conf import settings
6 from django.contrib.contenttypes.models import ContentType
7 from django.db.models import Count
8 from .models import Tag, Book
9 from os.path import getmtime
10 import cPickle
11 from collections import defaultdict
12
13
14
15 BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind')
16
17
18 _COUNTERS = None
19 _COUNTER_TIME = None
20 def get_top_level_related_tags(tags, categories=None):
21     """
22     Finds tags related to given tags through books, and counts their usage.
23
24     Takes ancestry into account: if a tag is applied to a book, its
25     usage on the book's descendants is ignored.
26     """
27     global _COUNTERS, _COUNTER_TIME
28     # First, check that we have a valid and recent version of the counters.
29     if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME:
30         with open(settings.CATALOGUE_COUNTERS_FILE) as f:
31             _COUNTERS = cPickle.load(f)
32
33     tagids = tuple(sorted(t.pk for t in tags))
34     try:
35         related_ids = _COUNTERS['next'][tagids]
36     except KeyError:
37         return
38
39     related = Tag.objects.filter(pk__in=related_ids)
40
41     # TODO: do we really need that?
42     if categories is not None:
43         related = related.filter(category__in=categories)
44
45     for tag in related:
46         tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))]
47         yield tag
48
49     #~ return related
50
51
52 def update_counters():
53     def combinations(things):
54         if len(things):
55             for c in combinations(things[1:]):
56                 yield c
57                 yield (things[0],) + c
58         else:
59             yield ()
60
61     def count_for_book(book, count_by_combination=None, parent_combinations=None):
62         if not parent_combinations:
63             parent_combinations = set()
64         tags = sorted(tuple(t.pk for t in book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind'))))
65         combs = list(combinations(tags))
66         for c in combs:
67             if c not in parent_combinations:
68                 count_by_combination[c] += 1
69         combs_for_child = set(list(parent_combinations) + combs)
70         for child in book.children.all():
71             count_for_book(child, count_by_combination, combs_for_child)
72
73     count_by_combination = defaultdict(lambda: 0)
74     for b in Book.objects.filter(parent=None):
75         count_for_book(b, count_by_combination)
76
77     next_combinations = defaultdict(set)
78     # Now build an index of all combinations.
79     for c in count_by_combination.keys():
80         if not c:
81             continue
82         for n in c:
83             rest = tuple(x for x in c if x != n)
84             next_combinations[rest].add(n)
85
86     counters = {
87         "count": dict(count_by_combination),
88         "next": dict(next_combinations),
89     }
90
91     with open(settings.CATALOGUE_COUNTERS_FILE, 'w') as f:
92         cPickle.dump(counters, f)