Wikidata in catalogue.
[redakcja.git] / src / documents / management / commands / insert_isbn.py
1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 import csv
5
6 import sys
7 from django.contrib.auth.models import User
8 from lxml import etree
9 from collections import defaultdict
10 from django.core.management import BaseCommand
11
12 from documents.models import Book
13 from librarian import RDFNS, DCNS
14
15 CONTENT_TYPES = {
16     'pdf':  'application/pdf',
17     'epub': 'application/epub+zip',
18     'mobi': 'application/x-mobipocket-ebook',
19     'txt':  'text/plain',
20     'html': 'text/html',
21 }
22
23
24 ISBN_TEMPLATES = (
25     r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
26     r'</dc:relation.hasFormat>',
27     r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
28     r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
29     r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
30 )
31
32
33 def url_for_format(slug, format):
34     if format == 'html':
35         return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
36     else:
37         return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
38
39
40 class Command(BaseCommand):
41     args = 'csv_file'
42
43     def add_arguments(self, parser):
44         self.add_argument(
45             '-u', '--username', dest='username', metavar='USER',
46             help='Assign commits to this user (required, preferably yourself).')
47
48     def handle(self, csv_file, **options):
49         username = options.get('username')
50
51         if username:
52             user = User.objects.get(username=username)
53         else:
54             print('Please provide a username.')
55             sys.exit(1)
56
57         csvfile = open(csv_file, 'rb')
58         isbn_lists = defaultdict(list)
59         for slug, format, isbn in csv.reader(csvfile, delimiter=','):
60             isbn_lists[slug].append((format, isbn))
61         csvfile.close()
62
63         for slug, isbn_list in isbn_lists.iteritems():
64             print('processing %s' % slug)
65             book = Book.objects.get(dc_slug=slug)
66             chunk = book.chunk_set.first()
67             old_head = chunk.head
68             src = old_head.materialize()
69             tree = etree.fromstring(src)
70             isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
71             if isbn_node is not None:
72                 print('%s already contains ISBN metadata, skipping' % slug)
73                 continue
74             desc = tree.find(".//" + RDFNS("Description"))
75             for format, isbn in isbn_list:
76                 for template in ISBN_TEMPLATES:
77                     isbn_xml = template % {
78                         'format': format,
79                         'isbn': isbn,
80                         'content_type': CONTENT_TYPES[format],
81                         'url': url_for_format(slug, format),
82                     }
83                     element = etree.XML(isbn_xml)
84                     element.tail = '\n'
85                     desc.append(element)
86             new_head = chunk.commit(
87                 etree.tostring(tree, encoding='unicode'),
88                 author=user,
89                 description='automatyczne dodanie isbn'
90             )
91             print('committed %s' % slug)
92             if old_head.publishable:
93                 new_head.set_publishable(True)
94             else:
95                 print('Warning: %s not publishable' % slug)