Merge branch 'master' of ssh://git.nowoczesnapolska.org.pl:2223/~/repo/wolnelektury
[wolnelektury.git] / apps / catalogue / management / commands / checkcovers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from optparse import make_option
6 from django.contrib.sites.models import Site
7 from django.core.management.base import BaseCommand
8 from catalogue import app_settings
9
10
11 def ancestor_has_cover(book):
12     while book.parent:
13         book = book.parent
14         if book.extra_info.get('cover_url'):
15             return True
16     return False
17
18
19 current_domain = Site.objects.get_current().domain
20 def full_url(obj):
21     return 'http://%s%s' % (
22                 current_domain,
23                 obj.get_absolute_url())
24
25
26 class Command(BaseCommand):
27     option_list = BaseCommand.option_list + (
28         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
29             help='Suppress output'),
30     )
31     help = 'Checks cover sources and licenses.'
32
33     def handle(self, **options):
34         from collections import defaultdict
35         import re
36         from django.db import transaction
37         from catalogue.models import Book
38
39         verbose = options['verbose']
40
41         without_cover = []
42         with_ancestral_cover = []
43         not_redakcja = []
44         bad_license = defaultdict(list)
45         no_license = []
46
47         re_license = re.compile(ur'.*,\s*(CC.*)')
48
49         redakcja_url = app_settings.REDAKCJA_URL
50         good_license = re.compile("(%s)" % ")|(".join(
51                             app_settings.GOOD_LICENSES))
52
53         with transaction.commit_on_success():
54             for book in Book.objects.all().order_by('slug').iterator():
55                 extra_info = book.extra_info
56                 if not extra_info.get('cover_url'):
57                     if ancestor_has_cover(book):
58                         with_ancestral_cover.append(book)
59                     else:
60                         without_cover.append(book)
61                 else:
62                     if not extra_info.get('cover_source', ''
63                                 ).startswith(redakcja_url):
64                         not_redakcja.append(book)
65                     match = re_license.match(extra_info.get('cover_by', ''))
66                     if match:
67                         if not good_license.match(match.group(1)):
68                             bad_license[match.group(1)].append(book)
69                     else:
70                         no_license.append(book)
71
72         print """%d books with no covers, %d with inherited covers.
73 Bad licenses used: %s (%d covers without license).
74 %d covers not from %s.
75 """ % (
76             len(without_cover),
77             len(with_ancestral_cover),
78             ", ".join(sorted(bad_license.keys())) or "none",
79             len(no_license),
80             len(not_redakcja),
81             redakcja_url,
82             )
83
84         if verbose:
85             if bad_license:
86                 print
87                 print "Bad license:"
88                 print "============"
89                 for lic, books in bad_license.items():
90                     print
91                     print lic
92                     for book in books:
93                         print full_url(book)
94
95             if no_license:
96                 print
97                 print "No license:"
98                 print "==========="
99                 for book in no_license:
100                     print
101                     print full_url(book)
102                     print book.extra_info.get('cover_by')
103                     print book.extra_info.get('cover_source')
104                     print book.extra_info.get('cover_url')
105
106             if not_redakcja:
107                 print
108                 print "Not from Redakcja or source missing:"
109                 print "===================================="
110                 for book in not_redakcja:
111                     print
112                     print full_url(book)
113                     print book.extra_info.get('cover_by')
114                     print book.extra_info.get('cover_source')
115                     print book.extra_info.get('cover_url')
116
117             if without_cover:
118                 print
119                 print "No cover:"
120                 print "========="
121                 for book in without_cover:
122                     print full_url(book)
123
124             if with_ancestral_cover:
125                 print
126                 print "With ancestral cover:"
127                 print "====================="
128                 for book in with_ancestral_cover:
129                     print full_url(book)