onix import/export
[wolnelektury.git] / src / isbn / management / commands / import_onix.py
1 # -*- coding: utf-8 -*-
2 from datetime import date
3 from lxml import etree
4 from django.core.management.base import BaseCommand
5
6 from isbn.models import ISBNPool, ONIXRecord
7
8 ONIXNS = '{http://ns.editeur.org/onix/3.0/reference}'
9
10 DIRECT_FIELDS = {
11     'product_form': 'ProductForm',
12     'product_form_detail': 'ProductFormDetail',
13     'title': 'TitleText',
14     'part_number': 'PartNumber',
15     'edition_type': 'EditionType',
16     'edition_number': 'EditionNumber',
17     'language': 'LanguageCode',
18     'imprint': 'ImprintName',
19 }
20
21 UNNAMED = u'Autor nieznany'
22
23
24 def parse_date(date_str):
25     year = int(date_str[:4])
26     month = int(date_str[4:6])
27     day = int(date_str[6:])
28     return date(year, month, day)
29
30
31 def get_descendants(element, tags):
32     if isinstance(tags, basestring):
33         tags = [tags]
34     return element.findall('.//' + '/'.join(ONIXNS + tag for tag in tags))
35
36
37 def get_field(element, tags, allow_multiple=False):
38     sub_elements = get_descendants(element, tags)
39     if not allow_multiple:
40         assert len(sub_elements) <= 1, 'multiple elements: %s' % tags
41     return sub_elements[0].text if sub_elements else None
42
43
44 class Command(BaseCommand):
45     help = "Import data from ONIX."
46     args = 'filename'
47
48     def handle(self, filename, *args, **options):
49         tree = etree.parse(open(filename))
50         for product in get_descendants(tree, 'Product'):
51             isbn = get_field(product, ['ProductIdentifier', 'IDValue'])
52             assert len(isbn) == 13
53             pool = ISBNPool.objects.get(prefix__in=[isbn[:i] for i in xrange(8, 11)])
54             contributors = [
55                 self.parse_contributor(contributor)
56                 for contributor in get_descendants(product, 'Contributor')]
57             record_data = {
58                 'isbn_pool': pool,
59                 'suffix': int(isbn[len(pool.prefix):-1]),
60                 'publishing_date': parse_date(
61                     get_field(product, ['PublishingDate', 'Date'], allow_multiple=True)),
62                 'contributors': contributors,
63             }
64             for field, tag in DIRECT_FIELDS.iteritems():
65                 record_data[field] = get_field(product, tag) or ''
66             record = ONIXRecord.objects.create(**record_data)
67             ONIXRecord.objects.filter(pk=record.pk).update(datestamp=parse_date(product.attrib['datestamp']))
68
69     @staticmethod
70     def parse_contributor(contributor):
71         data = {
72             'isni': get_field(contributor, 'IDValue'),
73             'name': get_field(contributor, 'PersonNameInverted'),
74             'corporate_name': get_field(contributor, 'CorporateName'),
75             'unnamed': get_field(contributor, 'UnnamedPersons')
76         }
77         contributor_data = {
78             'role': get_field(contributor, 'ContributorRole'),
79         }
80         for key, value in data.iteritems():
81             if value:
82                 contributor_data[key] = value
83         if contributor_data.get('name') == UNNAMED:
84             del contributor_data['name']
85             contributor_data['unnamed'] = '01'
86         for date_elem in get_descendants(contributor, 'ContributorDate'):
87             date_role = get_field(date_elem, 'ContributorDateRole')
88             date = get_field(date_elem, 'Date')
89             if date_role == '50':
90                 contributor_data['birth_date'] = date
91             elif date_role == '51':
92                 contributor_data['death_date'] = date
93         return contributor_data