Remove binary leftovers from migrations.
[wolnelektury.git] / src / isbn / management / commands / import_onix.py
1 # -*- coding: utf-8 -*-
2 from datetime import date
3 from lxml import etree
4 from django.core.management.base import BaseCommand
5
6 from isbn.models import ISBNPool, ONIXRecord
7 from librarian import XMLNamespace
8
9 ONIXNS = XMLNamespace('http://ns.editeur.org/onix/3.0/reference')
10
11 DIRECT_FIELDS = {
12     'product_form': 'ProductForm',
13     'product_form_detail': 'ProductFormDetail',
14     'title': 'TitleText',
15     'part_number': 'PartNumber',
16     'edition_type': 'EditionType',
17     'edition_number': 'EditionNumber',
18     'language': 'LanguageCode',
19     'imprint': 'ImprintName',
20 }
21
22 UNKNOWN = u'Autor nieznany'
23
24
25 def parse_date(date_str):
26     year = int(date_str[:4])
27     month = int(date_str[4:6])
28     day = int(date_str[6:])
29     return date(year, month, day)
30
31
32 def get_descendants(element, tags):
33     if isinstance(tags, str):
34         tags = [tags]
35     return element.findall('.//' + '/'.join(ONIXNS(tag) for tag in tags))
36
37
38 def get_field(element, tags, allow_multiple=False):
39     sub_elements = get_descendants(element, tags)
40     if not allow_multiple:
41         assert len(sub_elements) <= 1, 'multiple elements: %s' % tags
42     return sub_elements[0].text if sub_elements else None
43
44
45 class Command(BaseCommand):
46     help = "Import data from ONIX."
47
48     def add_arguments(self, parser):
49         parser.add_argument('filename')
50
51     def handle(self, **options):
52         filename = options['filename']
53         tree = etree.parse(open(filename))
54         for product in get_descendants(tree, 'Product'):
55             isbn = get_field(product, ['ProductIdentifier', 'IDValue'])
56             assert len(isbn) == 13
57             pool = ISBNPool.objects.get(prefix__in=[isbn[:i] for i in range(8, 11)])
58             contributors = [
59                 self.parse_contributor(contributor)
60                 for contributor in get_descendants(product, 'Contributor')]
61             record_data = {
62                 'isbn_pool': pool,
63                 'suffix': int(isbn[len(pool.prefix):-1]),
64                 'publishing_date': parse_date(
65                     get_field(product, ['PublishingDate', 'Date'], allow_multiple=True)),
66                 'contributors': contributors,
67             }
68             for field, tag in DIRECT_FIELDS.items():
69                 record_data[field] = get_field(product, tag) or ''
70             record = ONIXRecord.objects.create(**record_data)
71             ONIXRecord.objects.filter(pk=record.pk).update(datestamp=parse_date(product.attrib['datestamp']))
72
73     @staticmethod
74     def parse_contributor(contributor):
75         data = {
76             'isni': get_field(contributor, 'IDValue'),
77             'name': get_field(contributor, 'PersonNameInverted'),
78             'corporate_name': get_field(contributor, 'CorporateName'),
79             'unnamed': get_field(contributor, 'UnnamedPersons')
80         }
81         contributor_data = {
82             'role': get_field(contributor, 'ContributorRole'),
83         }
84         for key, value in data.items():
85             if value:
86                 contributor_data[key] = value
87         if contributor_data.get('name') == UNKNOWN:
88             del contributor_data['name']
89             contributor_data['unnamed'] = '01'
90         for date_elem in get_descendants(contributor, 'ContributorDate'):
91             date_role = get_field(date_elem, 'ContributorDateRole')
92             date = get_field(date_elem, 'Date')
93             if date_role == '50':
94                 contributor_data['birth_date'] = date
95             elif date_role == '51':
96                 contributor_data['death_date'] = date
97         return contributor_data