X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/357027375ff8867f42ca34bcbfb5a78b5b185fc3..65c2a737f0e3ac2fd83ad884edd1a47393a3f48b:/src/catalogue/helpers.py?ds=sidebyside diff --git a/src/catalogue/helpers.py b/src/catalogue/helpers.py index 7ca2cbd3a..c4b926783 100644 --- a/src/catalogue/helpers.py +++ b/src/catalogue/helpers.py @@ -1,58 +1,111 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +from django.conf import settings from django.contrib.contenttypes.models import ContentType -from django.db.models import Count +from django.core.cache import cache + from .models import Tag, Book +from os.path import getmtime +import pickle +from collections import defaultdict BOOK_CATEGORIES = ('author', 'epoch', 'genre', 'kind') +_COUNTERS = None +_COUNTER_TIME = 0 + -def get_top_level_related_tags(tags=None, categories=BOOK_CATEGORIES): +def get_top_level_related_tags(tags, categories=None): """ Finds tags related to given tags through books, and counts their usage. Takes ancestry into account: if a tag is applied to a book, its usage on the book's descendants is ignored. + """ + global _COUNTERS, _COUNTER_TIME + # First, check that we have a valid and recent version of the counters. + if getmtime(settings.CATALOGUE_COUNTERS_FILE) > _COUNTER_TIME: + for i in range(10): + try: + with open(settings.CATALOGUE_COUNTERS_FILE, 'rb') as f: + _COUNTERS = pickle.load(f) + except (EOFError, ValueError): + if i < 9: + continue + else: + raise + else: + break - This is tested for PostgreSQL 9.1+, and might not work elsewhere. - It particular, it uses raw SQL using WITH clause, which is - supported in SQLite from v. 3.8.3, and is missing in MySQL. - http://bugs.mysql.com/bug.php?id=16244 + tagids = tuple(sorted(t.pk for t in tags)) + try: + related_ids = _COUNTERS['next'][tagids] + except KeyError: + return - """ - # First, find all tag relations of relevant books. - bct = ContentType.objects.get_for_model(Book) - relations = Tag.intermediary_table_model.objects.filter( - content_type=bct) - if tags is not None: - tagged_books = Book.tagged.with_all(tags).only('pk') - relations = relations.filter( - object_id__in=tagged_books).exclude( - tag_id__in=[tag.pk for tag in tags]) - - rel_sql, rel_params = relations.query.sql_with_params() - - # Exclude those relations between a book and a tag, - # for which there is a relation between the book's ancestor - # and the tag and - - return Tag.objects.raw(''' - WITH AllTagged AS (''' + rel_sql + ''') - SELECT catalogue_tag.*, COUNT(catalogue_tag.id) AS count - FROM catalogue_tag, AllTagged - WHERE catalogue_tag.id=AllTagged.tag_id - AND catalogue_tag.category IN %s - AND NOT EXISTS ( - SELECT AncestorTagged.id - FROM catalogue_book_ancestor Ancestor, - AllTagged AncestorTagged - WHERE Ancestor.from_book_id=AllTagged.object_id - AND AncestorTagged.content_type_id=%s - AND AncestorTagged.object_id=Ancestor.to_book_id - AND AncestorTagged.tag_id=AllTagged.tag_id - ) - GROUP BY catalogue_tag.id - ORDER BY sort_key''', rel_params + (categories, bct.pk)) + related = Tag.objects.filter(pk__in=related_ids) + + if categories is not None: + related = related.filter(category__in=categories) + + for tag in related: + tag.count = _COUNTERS['count'][tuple(sorted(tagids + (tag.pk,)))] + yield tag + + +def update_counters(): + def combinations(things): + if len(things): + for c in combinations(things[1:]): + yield c + yield (things[0],) + c + else: + yield () + + def count_for_book(book, count_by_combination=None, parent_combinations=None): + if not parent_combinations: + parent_combinations = set() + tags = sorted(book.tags.filter(category__in=('author', 'genre', 'epoch', 'kind')).values_list('pk', flat=True)) + combs = list(combinations(tags)) + for c in combs: + if c not in parent_combinations: + count_by_combination[c] += 1 + combs_for_child = set(list(parent_combinations) + combs) + for child in book.children.all(): + count_for_book(child, count_by_combination, combs_for_child) + + count_by_combination = defaultdict(lambda: 0) + for b in Book.objects.filter(findable=True, parent=None): + count_for_book(b, count_by_combination) + + next_combinations = defaultdict(set) + # Now build an index of all combinations. + for c in count_by_combination.keys(): + if not c: + continue + for n in c: + rest = tuple(x for x in c if x != n) + next_combinations[rest].add(n) + + counters = { + "count": dict(count_by_combination), + "next": dict(next_combinations), + } + + with open(settings.CATALOGUE_COUNTERS_FILE, 'wb') as f: + pickle.dump(counters, f) + + +def get_audiobook_tags(): + audiobook_tag_ids = cache.get('audiobook_tags') + if audiobook_tag_ids is None: + books_with_audiobook = Book.objects.filter(findable=True, media__type__in=('mp3', 'ogg'))\ + .distinct().values_list('pk', flat=True) + audiobook_tag_ids = Tag.objects.filter( + items__content_type=ContentType.objects.get_for_model(Book), + items__object_id__in=list(books_with_audiobook)).distinct().values_list('pk', flat=True) + audiobook_tag_ids = list(audiobook_tag_ids) + cache.set('audiobook_tags', audiobook_tag_ids) + return audiobook_tag_ids