From: Jan Szejko Date: Tue, 13 Feb 2018 15:18:45 +0000 (+0100) Subject: onix import/export X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/3af75386ed5c78b09c4669442a2422a388e949c3 onix import/export --- diff --git a/src/isbn/__init__.py b/src/isbn/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/isbn/management/__init__.py b/src/isbn/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/isbn/management/commands/__init__.py b/src/isbn/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/isbn/management/commands/export_onix.py b/src/isbn/management/commands/export_onix.py new file mode 100644 index 000000000..19f3166f8 --- /dev/null +++ b/src/isbn/management/commands/export_onix.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +from django.core.management.base import BaseCommand +from django.utils import timezone + +from isbn.models import ONIXRecord + +HEADER = """ + +
+ + Fundacja NOWOCZESNA POLSKA + Paulina Choromańska + paulinachoromanska@nowoczesnapolska.org.pl + + %s + Opis wygenerowany przez wydawcę + pol +
""" + +PRODUCT = """ + + %(record_reference)s + 03 + 01 + Fundacja NOWOCZESNA POLSKA + + 15 + %(isbn)s + + + 00 + %(product_form)s%(product_form_detail)s + + 01 + + 01%(part_number)s + %(title)s + + %(contributors)s + %(edition_type)s + %(edition_number)s + + 01 + %(language)s + + + + + %(imprint)s + + + 01 + Fundacja NOWOCZESNA POLSKA + + Warszawa + PL + + 01 + %(publishing_date)s + + + 09 + %(publishing_date)s + + + """ + +PRODUCT_FORM_DETAIL = """ + %s""" + +PART_NUMBER = """ + %s""" + +CONTRIBUTOR = """ + + %(no)s + %(role)s%(identifier)s%(name)s%(corporate_name)s%(unnamed)s%(birth_date)s%(death_date)s + """ + +NAME_IDENTFIER = """ + + 16 + %(isni)s + """ + +NAME = """ + %s""" + +CORPORATE_NAME = """ + %s""" + +UNNAMED = """ + %s""" + +CONTRIBUTOR_DATE = """ + + %(role)s + %(date)s + """ + +FOOTER = """ +
""" + + +class Command(BaseCommand): + help = "Export ONIX." + + def handle(self, *args, **options): + xml = HEADER % timezone.now().strftime('%Y%m%dT%H%M%z') + for record in ONIXRecord.objects.all(): + xml += self.render_product(record) + xml += FOOTER + print xml.encode('utf-8') + + def render_product(self, record): + if record.product_form_detail: + product_form_detail = PRODUCT_FORM_DETAIL % record.product_form_detail + else: + product_form_detail = '' + if record.part_number: + part_number = PART_NUMBER % record.part_number + else: + part_number = '' + contributors = '' + for no, contributor in enumerate(record.contributors, start=1): + contributors += self.render_contributor(no, contributor) + return PRODUCT % { + 'datestamp': record.datestamp.strftime('%Y%m%d'), + 'record_reference': record.reference(), + 'isbn': record.isbn(), + 'product_form': record.product_form, + 'product_form_detail': product_form_detail, + 'part_number': part_number, + 'title': record.title, + 'contributors': contributors, + 'edition_type': record.edition_type, + 'edition_number': record.edition_number, + 'language': record.language, + 'imprint': record.imprint, + 'publishing_date': record.publishing_date.strftime('%Y%m%d'), + } + + @staticmethod + def render_contributor(no, contributor): + if 'isni' in contributor: + identifier = NAME_IDENTFIER % contributor + else: + identifier = '' + if 'name' in contributor: + name = NAME % contributor['name'] + else: + name = '' + if 'corporate_name' in contributor: + corporate_name = CORPORATE_NAME % contributor['corporate_name'] + else: + corporate_name = '' + if 'unnamed' in contributor: + unnamed = UNNAMED % contributor['unnamed'] + else: + unnamed = '' + if 'birth_date' in contributor: + birth_date = CONTRIBUTOR_DATE % {'role': '50', 'date': contributor['birth_date']} + else: + birth_date = '' + if 'death_date' in contributor: + death_date = CONTRIBUTOR_DATE % {'role': '51', 'date': contributor['death_date']} + else: + death_date = '' + return CONTRIBUTOR % { + 'no': no, + 'role': contributor['role'], + 'identifier': identifier, + 'name': name, + 'corporate_name': corporate_name, + 'unnamed': unnamed, + 'birth_date': birth_date, + 'death_date': death_date, + } diff --git a/src/isbn/management/commands/import_onix.py b/src/isbn/management/commands/import_onix.py new file mode 100644 index 000000000..1d86e966a --- /dev/null +++ b/src/isbn/management/commands/import_onix.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +from datetime import date +from lxml import etree +from django.core.management.base import BaseCommand + +from isbn.models import ISBNPool, ONIXRecord + +ONIXNS = '{http://ns.editeur.org/onix/3.0/reference}' + +DIRECT_FIELDS = { + 'product_form': 'ProductForm', + 'product_form_detail': 'ProductFormDetail', + 'title': 'TitleText', + 'part_number': 'PartNumber', + 'edition_type': 'EditionType', + 'edition_number': 'EditionNumber', + 'language': 'LanguageCode', + 'imprint': 'ImprintName', +} + +UNNAMED = u'Autor nieznany' + + +def parse_date(date_str): + year = int(date_str[:4]) + month = int(date_str[4:6]) + day = int(date_str[6:]) + return date(year, month, day) + + +def get_descendants(element, tags): + if isinstance(tags, basestring): + tags = [tags] + return element.findall('.//' + '/'.join(ONIXNS + tag for tag in tags)) + + +def get_field(element, tags, allow_multiple=False): + sub_elements = get_descendants(element, tags) + if not allow_multiple: + assert len(sub_elements) <= 1, 'multiple elements: %s' % tags + return sub_elements[0].text if sub_elements else None + + +class Command(BaseCommand): + help = "Import data from ONIX." + args = 'filename' + + def handle(self, filename, *args, **options): + tree = etree.parse(open(filename)) + for product in get_descendants(tree, 'Product'): + isbn = get_field(product, ['ProductIdentifier', 'IDValue']) + assert len(isbn) == 13 + pool = ISBNPool.objects.get(prefix__in=[isbn[:i] for i in xrange(8, 11)]) + contributors = [ + self.parse_contributor(contributor) + for contributor in get_descendants(product, 'Contributor')] + record_data = { + 'isbn_pool': pool, + 'suffix': int(isbn[len(pool.prefix):-1]), + 'publishing_date': parse_date( + get_field(product, ['PublishingDate', 'Date'], allow_multiple=True)), + 'contributors': contributors, + } + for field, tag in DIRECT_FIELDS.iteritems(): + record_data[field] = get_field(product, tag) or '' + record = ONIXRecord.objects.create(**record_data) + ONIXRecord.objects.filter(pk=record.pk).update(datestamp=parse_date(product.attrib['datestamp'])) + + @staticmethod + def parse_contributor(contributor): + data = { + 'isni': get_field(contributor, 'IDValue'), + 'name': get_field(contributor, 'PersonNameInverted'), + 'corporate_name': get_field(contributor, 'CorporateName'), + 'unnamed': get_field(contributor, 'UnnamedPersons') + } + contributor_data = { + 'role': get_field(contributor, 'ContributorRole'), + } + for key, value in data.iteritems(): + if value: + contributor_data[key] = value + if contributor_data.get('name') == UNNAMED: + del contributor_data['name'] + contributor_data['unnamed'] = '01' + for date_elem in get_descendants(contributor, 'ContributorDate'): + date_role = get_field(date_elem, 'ContributorDateRole') + date = get_field(date_elem, 'Date') + if date_role == '50': + contributor_data['birth_date'] = date + elif date_role == '51': + contributor_data['death_date'] = date + return contributor_data diff --git a/src/isbn/migrations/0001_initial.py b/src/isbn/migrations/0001_initial.py new file mode 100644 index 000000000..c4c09ad98 --- /dev/null +++ b/src/isbn/migrations/0001_initial.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import jsonfield.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='ISBNPool', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('prefix', models.CharField(max_length=10)), + ('suffix_from', models.IntegerField()), + ('suffix_to', models.IntegerField()), + ('ref_from', models.IntegerField()), + ('next_suffix', models.IntegerField()), + ], + ), + migrations.CreateModel( + name='ONIXRecord', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('datestamp', models.DateField(auto_now=True)), + ('suffix', models.IntegerField()), + ('product_form', models.CharField(max_length=4)), + ('product_form_detail', models.CharField(max_length=8, blank=True)), + ('title', models.CharField(max_length=256)), + ('part_number', models.CharField(max_length=64, blank=True)), + ('contributors', jsonfield.fields.JSONField()), + ('edition_type', models.CharField(max_length=4)), + ('edition_number', models.IntegerField(default=1)), + ('language', models.CharField(max_length=4)), + ('imprint', models.CharField(max_length=256)), + ('publishing_date', models.DateField()), + ('isbn_pool', models.ForeignKey(to='isbn.ISBNPool')), + ], + ), + ] diff --git a/src/isbn/migrations/0002_auto_20180213_1617.py b/src/isbn/migrations/0002_auto_20180213_1617.py new file mode 100644 index 000000000..3cf11a4df --- /dev/null +++ b/src/isbn/migrations/0002_auto_20180213_1617.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('isbn', '0001_initial'), + ] + + operations = [ + migrations.AlterModelOptions( + name='onixrecord', + options={'ordering': ['isbn_pool__id', 'suffix']}, + ), + ] diff --git a/src/isbn/migrations/__init__.py b/src/isbn/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/isbn/models.py b/src/isbn/models.py new file mode 100644 index 000000000..746281f74 --- /dev/null +++ b/src/isbn/models.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +from django.db import models +from jsonfield import JSONField + + +class ISBNPool(models.Model): + prefix = models.CharField(max_length=10) + suffix_from = models.IntegerField() + suffix_to = models.IntegerField() + ref_from = models.IntegerField() + next_suffix = models.IntegerField() + + @staticmethod + def check_digit(prefix12): + digits = [int(d) for d in prefix12] + return str((-sum(digits[0::2]) + 7 * sum(digits[1::2])) % 10) + + def isbn(self, suffix): + prefix_length = len(self.prefix) + suffix_length = 12 - prefix_length + suffix_str = ('%%0%dd' % suffix_length) % suffix + prefix12 = self.prefix + suffix_str + return prefix12 + self.check_digit(prefix12) + + +class ONIXRecord(models.Model): + isbn_pool = models.ForeignKey(ISBNPool) + datestamp = models.DateField(auto_now=True) + suffix = models.IntegerField() + product_form = models.CharField(max_length=4) + product_form_detail = models.CharField(max_length=8, blank=True) + title = models.CharField(max_length=256) + part_number = models.CharField(max_length=64, blank=True) + contributors = JSONField() # roles, names, optional: ISNI, date of birth/death + edition_type = models.CharField(max_length=4) + edition_number = models.IntegerField(default=1) + language = models.CharField(max_length=4) + imprint = models.CharField(max_length=256) + publishing_date = models.DateField() + + class Meta: + ordering = ['isbn_pool__id', 'suffix'] + + def isbn(self): + return self.isbn_pool.isbn(self.suffix) + + def reference(self): + return 'pl-eisbn-%s' % (self.isbn_pool.ref_from + self.suffix) diff --git a/src/wolnelektury/settings/__init__.py b/src/wolnelektury/settings/__init__.py index ac366513f..72035d972 100644 --- a/src/wolnelektury/settings/__init__.py +++ b/src/wolnelektury/settings/__init__.py @@ -59,6 +59,7 @@ INSTALLED_APPS_OUR = [ 'libraries', 'newsletter', 'contact', + 'isbn', ] GETPAID_BACKENDS = (