src/catalogue/helpers.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 from django.conf import settings
   6 from django.contrib.contenttypes.models import ContentType
   7 from django.core.cache import cache
   8
   9 from .models import Tag, Book
  10 from os.path import getmtime
  11 import cPickle
  12 from collections import defaultdict
  13
  14
  15 BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind')
  16
  17 _COUNTERS = None
  18 _COUNTER_TIME = None
  19
  20
  21 def get_top_level_related_tags(tags, categories=None):
  22     """
  23     Finds tags related to given tags through books, and counts their usage.
  24
  25     Takes ancestry into account: if a tag is applied to a book, its
  26     usage on the book's descendants is ignored.
  27     """
  28     global _COUNTERS, _COUNTER_TIME
  29     # First, check that we have a valid and recent version of the counters.
  30     if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME:
  31         for i in xrange(10):
  32             try:
  33                 with open(settings.CATALOGUE_COUNTERS_FILE) as f:
  34                     _COUNTERS = cPickle.load(f)
  35             except (EOFError, ValueError):
  36                 if i < 9:
  37                     continue
  38                 else:
  39                     raise
  40             else:
  41                 break
  42
  43     tagids = tuple(sorted(t.pk for t in tags))
  44     try:
  45         related_ids = _COUNTERS['next'][tagids]
  46     except KeyError:
  47         return
  48
  49     related = Tag.objects.filter(pk__in=related_ids)
  50
  51     if categories is not None:
  52         related = related.filter(category__in=categories)
  53
  54     for tag in related:
  55         tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))]
  56         yield tag
  57
  58
  59 def update_counters():
  60     def combinations(things):
  61         if len(things):
  62             for c in combinations(things[1:]):
  63                 yield c
  64                 yield (things[0],) + c
  65         else:
  66             yield ()
  67
  68     def count_for_book(book, count_by_combination=None, parent_combinations=None):
  69         if not parent_combinations:
  70             parent_combinations = set()
  71         tags = sorted(book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind')).values_list('pk', flat=True))
  72         combs = list(combinations(tags))
  73         for c in combs:
  74             if c not in parent_combinations:
  75                 count_by_combination[c] += 1
  76         combs_for_child = set(list(parent_combinations) + combs)
  77         for child in book.children.all():
  78             count_for_book(child, count_by_combination, combs_for_child)
  79
  80     count_by_combination = defaultdict(lambda: 0)
  81     for b in Book.objects.filter(parent=None):
  82         count_for_book(b, count_by_combination)
  83
  84     next_combinations = defaultdict(set)
  85     # Now build an index of all combinations.
  86     for c in count_by_combination.keys():
  87         if not c:
  88             continue
  89         for n in c:
  90             rest = tuple(x for x in c if x != n)
  91             next_combinations[rest].add(n)
  92
  93     counters = {
  94         "count": dict(count_by_combination),
  95         "next": dict(next_combinations),
  96     }
  97
  98     with open(settings.CATALOGUE_COUNTERS_FILE, 'w') as f:
  99         cPickle.dump(counters, f)
 100
 101
 102 def get_audiobook_tags():
 103     audiobook_tag_ids = cache.get('audiobook_tags')
 104     if audiobook_tag_ids is None:
 105         books_with_audiobook = Book.objects.filter(media__type__in=('mp3', 'ogg'))\
 106             .distinct().values_list('pk', flat=True)
 107         audiobook_tag_ids = Tag.objects.filter(
 108             items__content_type=ContentType.objects.get_for_model(Book),
 109             items__object_id__in=list(books_with_audiobook)).distinct().values_list('pk', flat=True)
 110         audiobook_tag_ids = list(audiobook_tag_ids)
 111         cache.set('audiobook_tags', audiobook_tag_ids)
 112     return audiobook_tag_ids