option to turn off ssify just for api + some optimizations
[wolnelektury.git] / src / catalogue / management / commands / checkcovers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from optparse import make_option
6 from django.contrib.sites.models import Site
7 from django.core.management.base import BaseCommand
8 from catalogue import app_settings
9 from django.utils.functional import lazy
10
11
12 def ancestor_has_cover(book):
13     while book.parent:
14         book = book.parent
15         if book.extra_info.get('cover_url'):
16             return True
17     return False
18
19
20 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
21
22
23 def full_url(obj):
24     return 'http://%s%s' % (
25                 current_domain,
26                 obj.get_absolute_url())
27
28
29 class Command(BaseCommand):
30     option_list = BaseCommand.option_list + (
31         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
32                     help='Suppress output'),
33     )
34     help = 'Checks cover sources and licenses.'
35
36     def handle(self, **options):
37         from collections import defaultdict
38         import re
39         from django.db import transaction
40         from catalogue.models import Book
41
42         verbose = options['verbose']
43
44         without_cover = []
45         with_ancestral_cover = []
46         not_redakcja = []
47         bad_license = defaultdict(list)
48         no_license = []
49
50         re_license = re.compile(ur'.*,\s*(CC.*)')
51
52         redakcja_url = app_settings.REDAKCJA_URL
53         good_license = re.compile("(%s)" % ")|(".join(
54                             app_settings.GOOD_LICENSES))
55
56         with transaction.atomic():
57             for book in Book.objects.all().order_by('slug').iterator():
58                 extra_info = book.extra_info
59                 if not extra_info.get('cover_url'):
60                     if ancestor_has_cover(book):
61                         with_ancestral_cover.append(book)
62                     else:
63                         without_cover.append(book)
64                 else:
65                     if not extra_info.get('cover_source', '').startswith(redakcja_url):
66                         not_redakcja.append(book)
67                     match = re_license.match(extra_info.get('cover_by', ''))
68                     if match:
69                         if not good_license.match(match.group(1)):
70                             bad_license[match.group(1)].append(book)
71                     else:
72                         no_license.append(book)
73
74         print """%d books with no covers, %d with inherited covers.
75 Bad licenses used: %s (%d covers without license).
76 %d covers not from %s.
77 """ % (
78             len(without_cover),
79             len(with_ancestral_cover),
80             ", ".join(sorted(bad_license.keys())) or "none",
81             len(no_license),
82             len(not_redakcja),
83             redakcja_url,
84             )
85
86         if verbose:
87             if bad_license:
88                 print
89                 print "Bad license:"
90                 print "============"
91                 for lic, books in bad_license.items():
92                     print
93                     print lic
94                     for book in books:
95                         print full_url(book)
96
97             if no_license:
98                 print
99                 print "No license:"
100                 print "==========="
101                 for book in no_license:
102                     print
103                     print full_url(book)
104                     print book.extra_info.get('cover_by')
105                     print book.extra_info.get('cover_source')
106                     print book.extra_info.get('cover_url')
107
108             if not_redakcja:
109                 print
110                 print "Not from Redakcja or source missing:"
111                 print "===================================="
112                 for book in not_redakcja:
113                     print
114                     print full_url(book)
115                     print book.extra_info.get('cover_by')
116                     print book.extra_info.get('cover_source')
117                     print book.extra_info.get('cover_url')
118
119             if without_cover:
120                 print
121                 print "No cover:"
122                 print "========="
123                 for book in without_cover:
124                     print full_url(book)
125
126             if with_ancestral_cover:
127                 print
128                 print "With ancestral cover:"
129                 print "====================="
130                 for book in with_ancestral_cover:
131                     print full_url(book)