src/documents/management/commands/insert_isbn.py

   1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
   2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   3 #
   4 import csv
   5
   6 import sys
   7 from django.contrib.auth.models import User
   8 from lxml import etree
   9 from collections import defaultdict
  10 from django.core.management import BaseCommand
  11
  12 from documents.models import Book
  13 from librarian import RDFNS, DCNS
  14
  15 CONTENT_TYPES = {
  16     'pdf':  'application/pdf',
  17     'epub': 'application/epub+zip',
  18     'mobi': 'application/x-mobipocket-ebook',
  19     'txt':  'text/plain',
  20     'html': 'text/html',
  21 }
  22
  23
  24 ISBN_TEMPLATES = (
  25     r'<dc:relation.hasFormat id="%(format)s" xmlns:dc="http://purl.org/dc/elements/1.1/">%(url)s'
  26     r'</dc:relation.hasFormat>',
  27     r'<meta refines="#%(format)s" id="%(format)s-id" property="dcterms:identifier">ISBN-%(isbn)s</meta>',
  28     r'<meta refines="#%(format)s-id" property="identifier-type">ISBN</meta>',
  29     r'<meta refines="#%(format)s" property="dcterms:format">%(content_type)s</meta>',
  30 )
  31
  32
  33 def url_for_format(slug, format):
  34     if format == 'html':
  35         return 'https://wolnelektury.pl/katalog/lektura/%s.html' % slug
  36     else:
  37         return 'http://wolnelektury.pl/media/book/%(format)s/%(slug)s.%(format)s' % {'slug': slug, 'format': format}
  38
  39
  40 class Command(BaseCommand):
  41     args = 'csv_file'
  42
  43     def add_arguments(self, parser):
  44         self.add_argument(
  45             '-u', '--username', dest='username', metavar='USER',
  46             help='Assign commits to this user (required, preferably yourself).')
  47
  48     def handle(self, csv_file, **options):
  49         username = options.get('username')
  50
  51         if username:
  52             user = User.objects.get(username=username)
  53         else:
  54             print('Please provide a username.')
  55             sys.exit(1)
  56
  57         csvfile = open(csv_file, 'rb')
  58         isbn_lists = defaultdict(list)
  59         for slug, format, isbn in csv.reader(csvfile, delimiter=','):
  60             isbn_lists[slug].append((format, isbn))
  61         csvfile.close()
  62
  63         for slug, isbn_list in isbn_lists.iteritems():
  64             print('processing %s' % slug)
  65             book = Book.objects.get(catalogue_book_id=slug)
  66             chunk = book.chunk_set.first()
  67             old_head = chunk.head
  68             src = old_head.materialize()
  69             tree = etree.fromstring(src)
  70             isbn_node = tree.find('.//' + DCNS("relation.hasFormat"))
  71             if isbn_node is not None:
  72                 print('%s already contains ISBN metadata, skipping' % slug)
  73                 continue
  74             desc = tree.find(".//" + RDFNS("Description"))
  75             for format, isbn in isbn_list:
  76                 for template in ISBN_TEMPLATES:
  77                     isbn_xml = template % {
  78                         'format': format,
  79                         'isbn': isbn,
  80                         'content_type': CONTENT_TYPES[format],
  81                         'url': url_for_format(slug, format),
  82                     }
  83                     element = etree.XML(isbn_xml)
  84                     element.tail = '\n'
  85                     desc.append(element)
  86             new_head = chunk.commit(
  87                 etree.tostring(tree, encoding='unicode'),
  88                 author=user,
  89                 description='automatyczne dodanie isbn'
  90             )
  91             print('committed %s' % slug)
  92             if old_head.publishable:
  93                 new_head.set_publishable(True)
  94             else:
  95                 print('Warning: %s not publishable' % slug)