To get from Yokohama to San Francisco, keep going east.
[wolnelektury.git] / src / catalogue / management / commands / checkcovers.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from django.contrib.sites.models import Site
5 from django.core.management.base import BaseCommand
6 from django.utils.functional import lazy
7 from catalogue import app_settings
8
9
10 def ancestor_has_cover(book):
11     while book.parent:
12         book = book.parent
13         if book.get_extra_info_json().get('cover_url'):
14             return True
15     return False
16
17
18 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
19
20
21 def full_url(obj):
22     return 'http://%s%s' % (
23                 current_domain,
24                 obj.get_absolute_url())
25
26
27 class Command(BaseCommand):
28     help = 'Checks cover sources and licenses.'
29
30     def add_arguments(self, parser):
31         parser.add_argument(
32                 '-q', '--quiet', action='store_false', dest='verbose',
33                 default=True, help='Suppress output')
34
35     def handle(self, **options):
36         from collections import defaultdict
37         import re
38         from django.db import transaction
39         from catalogue.models import Book
40
41         verbose = options['verbose']
42
43         without_cover = []
44         with_ancestral_cover = []
45         not_redakcja = []
46         bad_license = defaultdict(list)
47         no_license = []
48
49         re_license = re.compile(r'.*,\s*(CC.*)')
50
51         redakcja_url = app_settings.REDAKCJA_URL
52         good_license = re.compile("(%s)" % ")|(".join(
53                             app_settings.GOOD_LICENSES))
54
55         with transaction.atomic():
56             for book in Book.objects.all().order_by('slug').iterator():
57                 extra_info = book.get_extra_info_json()
58                 if not extra_info.get('cover_url'):
59                     if ancestor_has_cover(book):
60                         with_ancestral_cover.append(book)
61                     else:
62                         without_cover.append(book)
63                 else:
64                     if not extra_info.get('cover_source', '').startswith(redakcja_url):
65                         not_redakcja.append(book)
66                     match = re_license.match(extra_info.get('cover_by', ''))
67                     if match:
68                         if not good_license.match(match.group(1)):
69                             bad_license[match.group(1)].append(book)
70                     else:
71                         no_license.append(book)
72
73         print("""%d books with no covers, %d with inherited covers.
74 Bad licenses used: %s (%d covers without license).
75 %d covers not from %s.
76 """ % (
77             len(without_cover),
78             len(with_ancestral_cover),
79             ", ".join(sorted(bad_license.keys())) or "none",
80             len(no_license),
81             len(not_redakcja),
82             redakcja_url,
83             ))
84
85         if verbose:
86             if bad_license:
87                 print()
88                 print("Bad license:")
89                 print("============")
90                 for lic, books in bad_license.items():
91                     print()
92                     print(lic)
93                     for book in books:
94                         print(full_url(book))
95
96             if no_license:
97                 print()
98                 print("No license:")
99                 print("===========")
100                 for book in no_license:
101                     print()
102                     print(full_url(book))
103                     extra_info = book.get_extra_info_json()
104                     print(extra_info.get('cover_by'))
105                     print(extra_info.get('cover_source'))
106                     print(extra_info.get('cover_url'))
107
108             if not_redakcja:
109                 print()
110                 print("Not from Redakcja or source missing:")
111                 print("====================================")
112                 for book in not_redakcja:
113                     print()
114                     print(full_url(book))
115                     extra_info = book.get_extra_info_json()
116                     print(extra_info.get('cover_by'))
117                     print(extra_info.get('cover_source'))
118                     print(extra_info.get('cover_url'))
119
120             if without_cover:
121                 print()
122                 print("No cover:")
123                 print("=========")
124                 for book in without_cover:
125                     print(full_url(book))
126
127             if with_ancestral_cover:
128                 print()
129                 print("With ancestral cover:")
130                 print("=====================")
131                 for book in with_ancestral_cover:
132                     print(full_url(book))