X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/6280673f35e13e75e81c5b7821bd2a44a1831eab..357027375ff8867f42ca34bcbfb5a78b5b185fc3:/src/catalogue/management/commands/checkcovers.py diff --git a/src/catalogue/management/commands/checkcovers.py b/src/catalogue/management/commands/checkcovers.py new file mode 100644 index 000000000..7535dd474 --- /dev/null +++ b/src/catalogue/management/commands/checkcovers.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from optparse import make_option +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand +from catalogue import app_settings +from django.utils.functional import lazy + + +def ancestor_has_cover(book): + while book.parent: + book = book.parent + if book.extra_info.get('cover_url'): + return True + return False + + +current_domain = lazy(lambda: Site.objects.get_current().domain, str)() +def full_url(obj): + return 'http://%s%s' % ( + current_domain, + obj.get_absolute_url()) + + +class Command(BaseCommand): + option_list = BaseCommand.option_list + ( + make_option('-q', '--quiet', action='store_false', dest='verbose', default=True, + help='Suppress output'), + ) + help = 'Checks cover sources and licenses.' + + def handle(self, **options): + from collections import defaultdict + import re + from django.db import transaction + from catalogue.models import Book + + verbose = options['verbose'] + + without_cover = [] + with_ancestral_cover = [] + not_redakcja = [] + bad_license = defaultdict(list) + no_license = [] + + re_license = re.compile(ur'.*,\s*(CC.*)') + + redakcja_url = app_settings.REDAKCJA_URL + good_license = re.compile("(%s)" % ")|(".join( + app_settings.GOOD_LICENSES)) + + with transaction.commit_on_success(): + for book in Book.objects.all().order_by('slug').iterator(): + extra_info = book.extra_info + if not extra_info.get('cover_url'): + if ancestor_has_cover(book): + with_ancestral_cover.append(book) + else: + without_cover.append(book) + else: + if not extra_info.get('cover_source', '' + ).startswith(redakcja_url): + not_redakcja.append(book) + match = re_license.match(extra_info.get('cover_by', '')) + if match: + if not good_license.match(match.group(1)): + bad_license[match.group(1)].append(book) + else: + no_license.append(book) + + print """%d books with no covers, %d with inherited covers. +Bad licenses used: %s (%d covers without license). +%d covers not from %s. +""" % ( + len(without_cover), + len(with_ancestral_cover), + ", ".join(sorted(bad_license.keys())) or "none", + len(no_license), + len(not_redakcja), + redakcja_url, + ) + + if verbose: + if bad_license: + print + print "Bad license:" + print "============" + for lic, books in bad_license.items(): + print + print lic + for book in books: + print full_url(book) + + if no_license: + print + print "No license:" + print "===========" + for book in no_license: + print + print full_url(book) + print book.extra_info.get('cover_by') + print book.extra_info.get('cover_source') + print book.extra_info.get('cover_url') + + if not_redakcja: + print + print "Not from Redakcja or source missing:" + print "====================================" + for book in not_redakcja: + print + print full_url(book) + print book.extra_info.get('cover_by') + print book.extra_info.get('cover_source') + print book.extra_info.get('cover_url') + + if without_cover: + print + print "No cover:" + print "=========" + for book in without_cover: + print full_url(book) + + if with_ancestral_cover: + print + print "With ancestral cover:" + print "=====================" + for book in with_ancestral_cover: + print full_url(book)