src/catalogue/helpers.py

   1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 from django.conf import settings
   5 from django.contrib.contenttypes.models import ContentType
   6 from django.core.cache import cache
   7
   8 from .models import Tag, Book
   9 from os.path import getmtime
  10 import pickle
  11 from collections import defaultdict
  12
  13
  14 BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind')
  15
  16 _COUNTERS = None
  17 _COUNTER_TIME = 0
  18
  19
  20 def get_top_level_related_tags(tags, categories=None):
  21     """
  22     Finds tags related to given tags through books, and counts their usage.
  23
  24     Takes ancestry into account: if a tag is applied to a book, its
  25     usage on the book's descendants is ignored.
  26     """
  27     global _COUNTERS, _COUNTER_TIME
  28     # First, check that we have a valid and recent version of the counters.
  29     if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME:
  30         for i in range(10):
  31             try:
  32                 with open(settings.CATALOGUE_COUNTERS_FILE, 'rb') as f:
  33                     _COUNTERS = pickle.load(f)
  34             except (EOFError, ValueError):
  35                 if i < 9:
  36                     continue
  37                 else:
  38                     raise
  39             else:
  40                 break
  41
  42     tagids = tuple(sorted(t.pk for t in tags))
  43     try:
  44         related_ids = _COUNTERS['next'][tagids]
  45     except KeyError:
  46         return
  47
  48     related = Tag.objects.filter(pk__in=related_ids)
  49
  50     if categories is not None:
  51         related = related.filter(category__in=categories)
  52
  53     for tag in related:
  54         tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))]
  55         yield tag
  56
  57
  58 def update_counters():
  59     def combinations(things):
  60         if len(things):
  61             for c in combinations(things[1:]):
  62                 yield c
  63                 yield (things[0],) + c
  64         else:
  65             yield ()
  66
  67     def count_for_book(book, count_by_combination=None, parent_combinations=None):
  68         if not parent_combinations:
  69             parent_combinations = set()
  70         tags = sorted(book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind')).values_list('pk', flat=True))
  71         combs = list(combinations(tags))
  72         for c in combs:
  73             if c not in parent_combinations:
  74                 count_by_combination[c] += 1
  75         combs_for_child = set(list(parent_combinations) + combs)
  76         for child in book.children.all():
  77             count_for_book(child, count_by_combination, combs_for_child)
  78
  79     count_by_combination = defaultdict(lambda: 0)
  80     for b in Book.objects.filter(findable=True, parent=None):
  81         count_for_book(b, count_by_combination)
  82
  83     next_combinations = defaultdict(set)
  84     # Now build an index of all combinations.
  85     for c in count_by_combination.keys():
  86         if not c:
  87             continue
  88         for n in c:
  89             rest = tuple(x for x in c if x != n)
  90             next_combinations[rest].add(n)
  91
  92     counters = {
  93         "count": dict(count_by_combination),
  94         "next": dict(next_combinations),
  95     }
  96
  97     with open(settings.CATALOGUE_COUNTERS_FILE, 'wb') as f:
  98         pickle.dump(counters, f)
  99
 100
 101 def get_audiobook_tags():
 102     audiobook_tag_ids = cache.get('audiobook_tags')
 103     if audiobook_tag_ids is None:
 104         books_with_audiobook = Book.objects.filter(findable=True, media__type__in=('mp3', 'ogg'))\
 105             .distinct().values_list('pk', flat=True)
 106         audiobook_tag_ids = Tag.objects.filter(
 107             items__content_type=ContentType.objects.get_for_model(Book),
 108             items__object_id__in=list(books_with_audiobook)).distinct().values_list('pk', flat=True)
 109         audiobook_tag_ids = list(audiobook_tag_ids)
 110         cache.set('audiobook_tags', audiobook_tag_ids)
 111     return audiobook_tag_ids