cover attribution in book text and checking script
[wolnelektury.git] / apps / catalogue / management / commands / checkcovers.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from optparse import make_option
6 from django.contrib.sites.models import Site
7 from django.core.management.base import BaseCommand
8
9
10 def ancestor_has_cover(book):
11     while book.parent:
12         book = book.parent
13         if book.extra_info.get('cover_url'):
14             return True
15     return False
16
17
18 current_domain = Site.objects.get_current().domain
19 def full_url(obj):
20     return 'http://%s%s' % (
21                 current_domain,
22                 obj.get_absolute_url())
23
24
25 class Command(BaseCommand):
26     option_list = BaseCommand.option_list + (
27         make_option('-q', '--quiet', action='store_false', dest='verbose', default=True,
28             help='Suppress output'),
29     )
30     help = 'Checks cover sources and licenses.'
31
32     def handle(self, **options):
33         from collections import defaultdict
34         import re
35         from django.db import transaction
36         from catalogue.models import Book
37
38         verbose = options['verbose']
39
40         without_cover = []
41         with_ancestral_cover = []
42         by_flickr_author = defaultdict(list)
43         not_flickr = []
44         by_license = defaultdict(list)
45         no_license = []
46
47         re_flickr = re.compile(ur'https?://(?:www.)?flickr.com/photos/([^/]*)/.*')
48         re_license = re.compile(ur'.*,\s*(CC.*)')
49
50         with transaction.commit_on_success():
51             for book in Book.objects.all().order_by('slug').iterator():
52                 extra_info = book.extra_info
53                 if not extra_info.get('cover_url'):
54                     if ancestor_has_cover(book):
55                         with_ancestral_cover.append(book)
56                     else:
57                         without_cover.append(book)
58                 else:
59                     match = re_flickr.match(extra_info.get('cover_source', ''))
60                     if match:
61                         by_flickr_author[match.group(1)].append(book)
62                     else:
63                         not_flickr.append(book)
64                     match = re_license.match(extra_info.get('cover_by', ''))
65                     if match:
66                         by_license[match.group(1)].append(book)
67                     else:
68                         no_license.append(book)
69
70         print """%d books with no covers, %d with ancestral covers.
71 Licenses used: %s (%d covers without license).
72 Flickr authors: %s (%d covers not from flickr).
73 """ % (
74             len(without_cover),
75             len(with_ancestral_cover),
76             ", ".join(sorted(by_license.keys())),
77             len(no_license),
78             ", ".join(sorted(by_flickr_author.keys())),
79             len(not_flickr),
80             )
81
82         if verbose:
83             print
84             print "By license:"
85             print "==========="
86             for lic, books in by_license.items():
87                 print
88                 print lic
89                 for book in books:
90                     print full_url(book)
91
92             print
93             print "No license:"
94             print "==========="
95             for book in no_license:
96                 print
97                 print full_url(book)
98                 print book.extra_info.get('cover_by')
99                 print book.extra_info.get('cover_source')
100                 print book.extra_info.get('cover_url')
101
102             print
103             print "By Flickr author:"
104             print "================="
105             for author, books in by_flickr_author.items():
106                 print
107                 print "author: http://flickr.com/photos/%s/" % author
108                 for book in books:
109                     print full_url(book)
110
111             print
112             print "Not from Flickr or source missing:"
113             print "=================================="
114             for book in not_flickr:
115                 print
116                 print full_url(book)
117                 print book.extra_info.get('cover_by')
118                 print book.extra_info.get('cover_source')
119                 print book.extra_info.get('cover_url')
120
121             print
122             print "No cover:"
123             print "========="
124             for book in without_cover:
125                 print full_url(book)
126
127             print
128             print "With ancestral cover:"
129             print "====================="
130             for book in with_ancestral_cover:
131                 print full_url(book)