e342c90d401d42380587754437a4a8f99c2d4dce
[redakcja.git] / src / catalogue / management / commands / insert_isbn.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 import csv
7
8 import sys
9 from django.contrib.auth.models import User
10 from lxml import etree
11 from collections import defaultdict
12 from django.core.management import BaseCommand
13
14 from catalogue.models import Book
15 from librarian import RDFNS, DCNS
16
17 CONTENT_TYPES = {
18     'pdf':  'application/pdf',
19     'epub': 'application/epub+zip',
20     'mobi': 'application/x-mobipocket-ebook',
21     'txt':  'text/plain',
22     'html': 'text/html',
23 }
24
25
26 ISBN_TEMPLATES = (
27     r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
28     r'</dc:relation.hasFormat>',
29     r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
30     r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
31     r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
32 )
33
34
35 def url_for_format(slug, format):
36     if format == 'html':
37         return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
38     else:
39         return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
40
41
42 class Command(BaseCommand):
43     args = 'csv_file'
44
45     def add_arguments(self, parser):
46         self.add_argument(
47             '-u', '--username', dest='username', metavar='USER',
48             help='Assign commits to this user (required, preferably yourself).')
49
50     def handle(self, csv_file, **options):
51         username = options.get('username')
52
53         if username:
54             user = User.objects.get(username=username)
55         else:
56             print('Please provide a username.')
57             sys.exit(1)
58
59         csvfile = open(csv_file, 'rb')
60         isbn_lists = defaultdict(list)
61         for slug, format, isbn in csv.reader(csvfile, delimiter=','):
62             isbn_lists[slug].append((format, isbn))
63         csvfile.close()
64
65         for slug, isbn_list in isbn_lists.iteritems():
66             print('processing %s' % slug)
67             book = Book.objects.get(dc_slug=slug)
68             chunk = book.chunk_set.first()
69             old_head = chunk.head
70             src = old_head.materialize()
71             tree = etree.fromstring(src)
72             isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
73             if isbn_node is not None:
74                 print('%s already contains ISBN metadata, skipping' % slug)
75                 continue
76             desc = tree.find(".//" + RDFNS("Description"))
77             for format, isbn in isbn_list:
78                 for template in ISBN_TEMPLATES:
79                     isbn_xml = template % {
80                         'format': format,
81                         'isbn': isbn,
82                         'content_type': CONTENT_TYPES[format],
83                         'url': url_for_format(slug, format),
84                     }
85                     element = etree.XML(isbn_xml)
86                     element.tail = '\n'
87                     desc.append(element)
88             new_head = chunk.commit(
89                 etree.tostring(tree, encoding='unicode'),
90                 author=user,
91                 description='automatyczne dodanie isbn'
92             )
93             print('committed %s' % slug)
94             if old_head.publishable:
95                 new_head.set_publishable(True)
96             else:
97                 print('Warning: %s not publishable' % slug)