src/catalogue/management/commands/insert_isbn.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
   4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   5 #
   6 import csv
   7
   8 import sys
   9 from django.contrib.auth.models import User
  10 from lxml import etree
  11 from collections import defaultdict
  12 from django.core.management import BaseCommand
  13
  14 from catalogue.models import Book
  15 from librarian import RDFNS, DCNS
  16
  17 CONTENT_TYPES = {
  18     'pdf':  'application/pdf',
  19     'epub': 'application/epub+zip',
  20     'mobi': 'application/x-mobipocket-ebook',
  21     'txt':  'text/plain',
  22     'html': 'text/html',
  23 }
  24
  25
  26 ISBN_TEMPLATES = (
  27     r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
  28     r'</dc:relation.hasFormat>',
  29     r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
  30     r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
  31     r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
  32 )
  33
  34
  35 def url_for_format(slug, format):
  36     if format == 'html':
  37         return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
  38     else:
  39         return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
  40
  41
  42 class Command(BaseCommand):
  43     args = 'csv_file'
  44
  45     def add_arguments(self, parser):
  46         self.add_argument(
  47             '-u', '--username', dest='username', metavar='USER',
  48             help='Assign commits to this user (required, preferably yourself).')
  49
  50     def handle(self, csv_file, **options):
  51         username = options.get('username')
  52
  53         if username:
  54             user = User.objects.get(username=username)
  55         else:
  56             print('Please provide a username.')
  57             sys.exit(1)
  58
  59         csvfile = open(csv_file, 'rb')
  60         isbn_lists = defaultdict(list)
  61         for slug, format, isbn in csv.reader(csvfile, delimiter=','):
  62             isbn_lists[slug].append((format, isbn))
  63         csvfile.close()
  64
  65         for slug, isbn_list in isbn_lists.iteritems():
  66             print('processing %s' % slug)
  67             book = Book.objects.get(dc_slug=slug)
  68             chunk = book.chunk_set.first()
  69             old_head = chunk.head
  70             src = old_head.materialize()
  71             tree = etree.fromstring(src)
  72             isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
  73             if isbn_node is not None:
  74                 print('%s already contains ISBN metadata, skipping' % slug)
  75                 continue
  76             desc = tree.find(".//" + RDFNS("Description"))
  77             for format, isbn in isbn_list:
  78                 for template in ISBN_TEMPLATES:
  79                     isbn_xml = template % {
  80                         'format': format,
  81                         'isbn': isbn,
  82                         'content_type': CONTENT_TYPES[format],
  83                         'url': url_for_format(slug, format),
  84                     }
  85                     element = etree.XML(isbn_xml)
  86                     element.tail = '\n'
  87                     desc.append(element)
  88             new_head = chunk.commit(
  89                 etree.tostring(tree, encoding='unicode'),
  90                 author=user,
  91                 description='automatyczne dodanie isbn'
  92             )
  93             print('committed %s' % slug)
  94             if old_head.publishable:
  95                 new_head.set_publishable(True)
  96             else:
  97                 print('Warning: %s not publishable' % slug)