Minor fix in OAI-PMH.
[wolnelektury.git] / apps / catalogue / management / commands / checkcovers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from optparse import make_option
6 from django.contrib.sites.models import Site
7 from django.core.management.base import BaseCommand
8 from catalogue import app_settings
9 from django.utils.functional import lazy
10
11
12 def ancestor_has_cover(book):
13     while book.parent:
14         book = book.parent
15         if book.extra_info.get('cover_url'):
16             return True
17     return False
18
19
20 current_domain = lazy(lambda: Site.objects.get_current().domain, str)()
21 def full_url(obj):
22     return 'http://%s%s' % (
23                 current_domain,
24                 obj.get_absolute_url())
25
26
27 class Command(BaseCommand):
28     option_list = BaseCommand.option_list + (
29         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
30             help='Suppress output'),
31     )
32     help = 'Checks cover sources and licenses.'
33
34     def handle(self, **options):
35         from collections import defaultdict
36         import re
37         from django.db import transaction
38         from catalogue.models import Book
39
40         verbose = options['verbose']
41
42         without_cover = []
43         with_ancestral_cover = []
44         not_redakcja = []
45         bad_license = defaultdict(list)
46         no_license = []
47
48         re_license = re.compile(ur'.*,\s*(CC.*)')
49
50         redakcja_url = app_settings.REDAKCJA_URL
51         good_license = re.compile("(%s)" % ")|(".join(
52                             app_settings.GOOD_LICENSES))
53
54         with transaction.commit_on_success():
55             for book in Book.objects.all().order_by('slug').iterator():
56                 extra_info = book.extra_info
57                 if not extra_info.get('cover_url'):
58                     if ancestor_has_cover(book):
59                         with_ancestral_cover.append(book)
60                     else:
61                         without_cover.append(book)
62                 else:
63                     if not extra_info.get('cover_source', ''
64                                 ).startswith(redakcja_url):
65                         not_redakcja.append(book)
66                     match = re_license.match(extra_info.get('cover_by', ''))
67                     if match:
68                         if not good_license.match(match.group(1)):
69                             bad_license[match.group(1)].append(book)
70                     else:
71                         no_license.append(book)
72
73         print """%d books with no covers, %d with inherited covers.
74 Bad licenses used: %s (%d covers without license).
75 %d covers not from %s.
76 """ % (
77             len(without_cover),
78             len(with_ancestral_cover),
79             ", ".join(sorted(bad_license.keys())) or "none",
80             len(no_license),
81             len(not_redakcja),
82             redakcja_url,
83             )
84
85         if verbose:
86             if bad_license:
87                 print
88                 print "Bad license:"
89                 print "============"
90                 for lic, books in bad_license.items():
91                     print
92                     print lic
93                     for book in books:
94                         print full_url(book)
95
96             if no_license:
97                 print
98                 print "No license:"
99                 print "==========="
100                 for book in no_license:
101                     print
102                     print full_url(book)
103                     print book.extra_info.get('cover_by')
104                     print book.extra_info.get('cover_source')
105                     print book.extra_info.get('cover_url')
106
107             if not_redakcja:
108                 print
109                 print "Not from Redakcja or source missing:"
110                 print "===================================="
111                 for book in not_redakcja:
112                     print
113                     print full_url(book)
114                     print book.extra_info.get('cover_by')
115                     print book.extra_info.get('cover_source')
116                     print book.extra_info.get('cover_url')
117
118             if without_cover:
119                 print
120                 print "No cover:"
121                 print "========="
122                 for book in without_cover:
123                     print full_url(book)
124
125             if with_ancestral_cover:
126                 print
127                 print "With ancestral cover:"
128                 print "====================="
129                 for book in with_ancestral_cover:
130                     print full_url(book)