skip books with isbn metadata
[redakcja.git] / apps / catalogue / management / commands / insert_isbn.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import csv
7
8 import sys
9 from django.contrib.auth.models import User
10 from lxml import etree
11 from optparse import make_option
12
13 from collections import defaultdict
14 from django.core.management import BaseCommand
15
16 from catalogue.models import Book
17 from librarian import RDFNS, DCNS
18
19 CONTENT_TYPES = {
20     'pdf':  'application/pdf',
21     'epub': 'application/epub+zip',
22     'mobi': 'application/x-mobipocket-ebook',
23     'txt':  'text/plain',
24     'html': 'text/html',
25 }
26
27
28 ISBN_TEMPLATES = (
29     r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
30     r'</dc:relation.hasFormat>',
31     r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
32     r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
33     r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
34 )
35
36
37 def url_for_format(slug, format):
38     if format == 'html':
39         return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
40     else:
41         return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
42
43
44 class Command(BaseCommand):
45     option_list = BaseCommand.option_list + (
46         # make_option('-q', '--quiet', action='store_false', dest='verbose',
47         #     default=True, help='Less output'),
48         # make_option('-d', '--dry-run', action='store_true', dest='dry_run',
49         #     default=False, help="Don't actually touch anything"),
50         make_option(
51             '-u', '--username', dest='username', metavar='USER',
52             help='Assign commits to this user (required, preferably yourself).'),
53     )
54     args = 'csv_file'
55
56     def handle(self, csv_file, **options):
57         username = options.get('username')
58
59         if username:
60             user = User.objects.get(username=username)
61         else:
62             print 'Please provide a username.'
63             sys.exit(1)
64
65         csvfile = open(csv_file, 'rb')
66         isbn_lists = defaultdict(list)
67         for slug, format, isbn in csv.reader(csvfile, delimiter=','):
68             isbn_lists[slug].append((format, isbn))
69         csvfile.close()
70
71         for slug, isbn_list in isbn_lists.iteritems():
72             print 'processing %s' % slug
73             book = Book.objects.get(dc_slug=slug)
74             chunk = book.chunk_set.first()
75             old_head = chunk.head
76             src = old_head.materialize()
77             tree = etree.fromstring(src)
78             isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
79             if isbn_node is not None:
80                 print '%s already contains ISBN metadata, skipping' % slug
81                 continue
82             desc = tree.find(".//" + RDFNS("Description"))
83             for format, isbn in isbn_list:
84                 for template in ISBN_TEMPLATES:
85                     isbn_xml = template % {
86                         'format': format,
87                         'isbn': isbn,
88                         'content_type': CONTENT_TYPES[format],
89                         'url': url_for_format(slug, format),
90                     }
91                     element = etree.XML(isbn_xml)
92                     element.tail = '\n'
93                     desc.append(element)
94             new_head = chunk.commit(
95                 etree.tostring(tree, encoding=unicode),
96                 author=user,
97                 description='automatyczne dodanie isbn'
98             )
99             print 'committed %s' % slug
100             if old_head.publishable:
101                 new_head.set_publishable(True)
102             else:
103                 print 'Warning: %s not publishable' % slug