onix import/export
authorJan Szejko <janek37@gmail.com>
Tue, 13 Feb 2018 15:18:45 +0000 (16:18 +0100)
committerJan Szejko <janek37@gmail.com>
Tue, 13 Feb 2018 15:18:45 +0000 (16:18 +0100)
src/isbn/__init__.py [new file with mode: 0644]
src/isbn/management/__init__.py [new file with mode: 0644]
src/isbn/management/commands/__init__.py [new file with mode: 0644]
src/isbn/management/commands/export_onix.py [new file with mode: 0644]
src/isbn/management/commands/import_onix.py [new file with mode: 0644]
src/isbn/migrations/0001_initial.py [new file with mode: 0644]
src/isbn/migrations/0002_auto_20180213_1617.py [new file with mode: 0644]
src/isbn/migrations/__init__.py [new file with mode: 0644]
src/isbn/models.py [new file with mode: 0644]
src/wolnelektury/settings/__init__.py

diff --git a/src/isbn/__init__.py b/src/isbn/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/isbn/management/__init__.py b/src/isbn/management/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/isbn/management/commands/__init__.py b/src/isbn/management/commands/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/isbn/management/commands/export_onix.py b/src/isbn/management/commands/export_onix.py
new file mode 100644 (file)
index 0000000..19f3166
--- /dev/null
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from django.core.management.base import BaseCommand
+from django.utils import timezone
+
+from isbn.models import ONIXRecord
+
+HEADER = """<?xml version="1.0" encoding="UTF-8"?>
+<ONIXMessage release="3.0"
+    xmlns="http://ns.editeur.org/onix/3.0/reference"
+    xmlns:schemaLocation="https://e-isbn.pl/IsbnWeb/schemas/ONIX_BookProduct_3.0_reference.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <Header>
+        <Sender>
+            <SenderName>Fundacja NOWOCZESNA POLSKA</SenderName>
+            <ContactName>Paulina Choromańska</ContactName>
+            <EmailAddress>paulinachoromanska@nowoczesnapolska.org.pl</EmailAddress>
+        </Sender>
+        <SentDateTime>%s</SentDateTime>
+        <MessageNote>Opis wygenerowany przez wydawcę</MessageNote>
+        <DefaultLanguageOfText>pol</DefaultLanguageOfText>
+    </Header>"""
+
+PRODUCT = """
+    <Product datestamp="%(datestamp)s">
+        <RecordReference>%(record_reference)s</RecordReference>
+        <NotificationType>03</NotificationType>
+        <RecordSourceType>01</RecordSourceType>
+        <RecordSourceName>Fundacja NOWOCZESNA POLSKA</RecordSourceName>
+        <ProductIdentifier>
+            <ProductIDType>15</ProductIDType>
+            <IDValue>%(isbn)s</IDValue>
+        </ProductIdentifier>
+        <DescriptiveDetail>
+            <ProductComposition>00</ProductComposition>
+            <ProductForm>%(product_form)s</ProductForm>%(product_form_detail)s
+            <TitleDetail>
+                <TitleType>01</TitleType>
+                <TitleElement>
+                    <TitleElementLevel>01</TitleElementLevel>%(part_number)s
+                    <TitleText>%(title)s</TitleText>
+                </TitleElement>
+            </TitleDetail>%(contributors)s
+            <EditionType>%(edition_type)s</EditionType>
+            <EditionNumber>%(edition_number)s</EditionNumber>
+            <Language>
+                <LanguageRole>01</LanguageRole>
+                <LanguageCode>%(language)s</LanguageCode>
+            </Language>
+        </DescriptiveDetail>
+        <PublishingDetail>
+            <Imprint>
+                <ImprintName>%(imprint)s</ImprintName>
+            </Imprint>
+            <Publisher>
+                <PublishingRole>01</PublishingRole>
+                <PublisherName>Fundacja NOWOCZESNA POLSKA</PublisherName>
+            </Publisher>
+            <CityOfPublication>Warszawa</CityOfPublication>
+            <CountryOfPublication>PL</CountryOfPublication>
+            <PublishingDate>
+                <PublishingDateRole>01</PublishingDateRole>
+                <Date>%(publishing_date)s</Date>
+            </PublishingDate>
+            <PublishingDate>
+                <PublishingDateRole>09</PublishingDateRole>
+                <Date>%(publishing_date)s</Date>
+            </PublishingDate>
+        </PublishingDetail>
+    </Product>"""
+
+PRODUCT_FORM_DETAIL = """
+            <ProductFormDetail>%s</ProductFormDetail>"""
+
+PART_NUMBER = """
+                    <PartNumber>%s</PartNumber>"""
+
+CONTRIBUTOR = """
+            <Contributor>
+                <SequenceNumber>%(no)s</SequenceNumber>
+                <ContributorRole>%(role)s</ContributorRole>%(identifier)s%(name)s%(corporate_name)s%(unnamed)s%(birth_date)s%(death_date)s
+            </Contributor>"""
+
+NAME_IDENTFIER = """
+                <NameIdentifier>
+                    <NameIDType>16</NameIDType>
+                    <IDValue>%(isni)s</IDValue>
+                </NameIdentifier>"""
+
+NAME = """
+                <PersonNameInverted>%s</PersonNameInverted>"""
+
+CORPORATE_NAME = """
+                <CorporateName>%s</CorporateName>"""
+
+UNNAMED = """
+                <UnnamedPersons>%s</UnnamedPersons>"""
+
+CONTRIBUTOR_DATE = """
+                <ContributorDate>
+                    <ContributorDateRole>%(role)s</ContributorDateRole>
+                    <Date>%(date)s</Date>
+                </ContributorDate>"""
+
+FOOTER = """
+</ONIXMessage>"""
+
+
+class Command(BaseCommand):
+    help = "Export ONIX."
+
+    def handle(self, *args, **options):
+        xml = HEADER % timezone.now().strftime('%Y%m%dT%H%M%z')
+        for record in ONIXRecord.objects.all():
+            xml += self.render_product(record)
+        xml += FOOTER
+        print xml.encode('utf-8')
+
+    def render_product(self, record):
+        if record.product_form_detail:
+            product_form_detail = PRODUCT_FORM_DETAIL % record.product_form_detail
+        else:
+            product_form_detail = ''
+        if record.part_number:
+            part_number = PART_NUMBER % record.part_number
+        else:
+            part_number = ''
+        contributors = ''
+        for no, contributor in enumerate(record.contributors, start=1):
+            contributors += self.render_contributor(no, contributor)
+        return PRODUCT % {
+            'datestamp': record.datestamp.strftime('%Y%m%d'),
+            'record_reference': record.reference(),
+            'isbn': record.isbn(),
+            'product_form': record.product_form,
+            'product_form_detail': product_form_detail,
+            'part_number': part_number,
+            'title': record.title,
+            'contributors': contributors,
+            'edition_type': record.edition_type,
+            'edition_number': record.edition_number,
+            'language': record.language,
+            'imprint': record.imprint,
+            'publishing_date': record.publishing_date.strftime('%Y%m%d'),
+        }
+
+    @staticmethod
+    def render_contributor(no, contributor):
+        if 'isni' in contributor:
+            identifier = NAME_IDENTFIER % contributor
+        else:
+            identifier = ''
+        if 'name' in contributor:
+            name = NAME % contributor['name']
+        else:
+            name = ''
+        if 'corporate_name' in contributor:
+            corporate_name = CORPORATE_NAME % contributor['corporate_name']
+        else:
+            corporate_name = ''
+        if 'unnamed' in contributor:
+            unnamed = UNNAMED % contributor['unnamed']
+        else:
+            unnamed = ''
+        if 'birth_date' in contributor:
+            birth_date = CONTRIBUTOR_DATE % {'role': '50', 'date': contributor['birth_date']}
+        else:
+            birth_date = ''
+        if 'death_date' in contributor:
+            death_date = CONTRIBUTOR_DATE % {'role': '51', 'date': contributor['death_date']}
+        else:
+            death_date = ''
+        return CONTRIBUTOR % {
+            'no': no,
+            'role': contributor['role'],
+            'identifier': identifier,
+            'name': name,
+            'corporate_name': corporate_name,
+            'unnamed': unnamed,
+            'birth_date': birth_date,
+            'death_date': death_date,
+        }
diff --git a/src/isbn/management/commands/import_onix.py b/src/isbn/management/commands/import_onix.py
new file mode 100644 (file)
index 0000000..1d86e96
--- /dev/null
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+from datetime import date
+from lxml import etree
+from django.core.management.base import BaseCommand
+
+from isbn.models import ISBNPool, ONIXRecord
+
+ONIXNS = '{http://ns.editeur.org/onix/3.0/reference}'
+
+DIRECT_FIELDS = {
+    'product_form': 'ProductForm',
+    'product_form_detail': 'ProductFormDetail',
+    'title': 'TitleText',
+    'part_number': 'PartNumber',
+    'edition_type': 'EditionType',
+    'edition_number': 'EditionNumber',
+    'language': 'LanguageCode',
+    'imprint': 'ImprintName',
+}
+
+UNNAMED = u'Autor nieznany'
+
+
+def parse_date(date_str):
+    year = int(date_str[:4])
+    month = int(date_str[4:6])
+    day = int(date_str[6:])
+    return date(year, month, day)
+
+
+def get_descendants(element, tags):
+    if isinstance(tags, basestring):
+        tags = [tags]
+    return element.findall('.//' + '/'.join(ONIXNS + tag for tag in tags))
+
+
+def get_field(element, tags, allow_multiple=False):
+    sub_elements = get_descendants(element, tags)
+    if not allow_multiple:
+        assert len(sub_elements) <= 1, 'multiple elements: %s' % tags
+    return sub_elements[0].text if sub_elements else None
+
+
+class Command(BaseCommand):
+    help = "Import data from ONIX."
+    args = 'filename'
+
+    def handle(self, filename, *args, **options):
+        tree = etree.parse(open(filename))
+        for product in get_descendants(tree, 'Product'):
+            isbn = get_field(product, ['ProductIdentifier', 'IDValue'])
+            assert len(isbn) == 13
+            pool = ISBNPool.objects.get(prefix__in=[isbn[:i] for i in xrange(8, 11)])
+            contributors = [
+                self.parse_contributor(contributor)
+                for contributor in get_descendants(product, 'Contributor')]
+            record_data = {
+                'isbn_pool': pool,
+                'suffix': int(isbn[len(pool.prefix):-1]),
+                'publishing_date': parse_date(
+                    get_field(product, ['PublishingDate', 'Date'], allow_multiple=True)),
+                'contributors': contributors,
+            }
+            for field, tag in DIRECT_FIELDS.iteritems():
+                record_data[field] = get_field(product, tag) or ''
+            record = ONIXRecord.objects.create(**record_data)
+            ONIXRecord.objects.filter(pk=record.pk).update(datestamp=parse_date(product.attrib['datestamp']))
+
+    @staticmethod
+    def parse_contributor(contributor):
+        data = {
+            'isni': get_field(contributor, 'IDValue'),
+            'name': get_field(contributor, 'PersonNameInverted'),
+            'corporate_name': get_field(contributor, 'CorporateName'),
+            'unnamed': get_field(contributor, 'UnnamedPersons')
+        }
+        contributor_data = {
+            'role': get_field(contributor, 'ContributorRole'),
+        }
+        for key, value in data.iteritems():
+            if value:
+                contributor_data[key] = value
+        if contributor_data.get('name') == UNNAMED:
+            del contributor_data['name']
+            contributor_data['unnamed'] = '01'
+        for date_elem in get_descendants(contributor, 'ContributorDate'):
+            date_role = get_field(date_elem, 'ContributorDateRole')
+            date = get_field(date_elem, 'Date')
+            if date_role == '50':
+                contributor_data['birth_date'] = date
+            elif date_role == '51':
+                contributor_data['death_date'] = date
+        return contributor_data
diff --git a/src/isbn/migrations/0001_initial.py b/src/isbn/migrations/0001_initial.py
new file mode 100644 (file)
index 0000000..c4c09ad
--- /dev/null
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import jsonfield.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='ISBNPool',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('prefix', models.CharField(max_length=10)),
+                ('suffix_from', models.IntegerField()),
+                ('suffix_to', models.IntegerField()),
+                ('ref_from', models.IntegerField()),
+                ('next_suffix', models.IntegerField()),
+            ],
+        ),
+        migrations.CreateModel(
+            name='ONIXRecord',
+            fields=[
+                ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)),
+                ('datestamp', models.DateField(auto_now=True)),
+                ('suffix', models.IntegerField()),
+                ('product_form', models.CharField(max_length=4)),
+                ('product_form_detail', models.CharField(max_length=8, blank=True)),
+                ('title', models.CharField(max_length=256)),
+                ('part_number', models.CharField(max_length=64, blank=True)),
+                ('contributors', jsonfield.fields.JSONField()),
+                ('edition_type', models.CharField(max_length=4)),
+                ('edition_number', models.IntegerField(default=1)),
+                ('language', models.CharField(max_length=4)),
+                ('imprint', models.CharField(max_length=256)),
+                ('publishing_date', models.DateField()),
+                ('isbn_pool', models.ForeignKey(to='isbn.ISBNPool')),
+            ],
+        ),
+    ]
diff --git a/src/isbn/migrations/0002_auto_20180213_1617.py b/src/isbn/migrations/0002_auto_20180213_1617.py
new file mode 100644 (file)
index 0000000..3cf11a4
--- /dev/null
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('isbn', '0001_initial'),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='onixrecord',
+            options={'ordering': ['isbn_pool__id', 'suffix']},
+        ),
+    ]
diff --git a/src/isbn/migrations/__init__.py b/src/isbn/migrations/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/src/isbn/models.py b/src/isbn/models.py
new file mode 100644 (file)
index 0000000..746281f
--- /dev/null
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from django.db import models
+from jsonfield import JSONField
+
+
+class ISBNPool(models.Model):
+    prefix = models.CharField(max_length=10)
+    suffix_from = models.IntegerField()
+    suffix_to = models.IntegerField()
+    ref_from = models.IntegerField()
+    next_suffix = models.IntegerField()
+
+    @staticmethod
+    def check_digit(prefix12):
+        digits = [int(d) for d in prefix12]
+        return str((-sum(digits[0::2]) + 7 * sum(digits[1::2])) % 10)
+
+    def isbn(self, suffix):
+        prefix_length = len(self.prefix)
+        suffix_length = 12 - prefix_length
+        suffix_str = ('%%0%dd' % suffix_length) % suffix
+        prefix12 = self.prefix + suffix_str
+        return prefix12 + self.check_digit(prefix12)
+
+
+class ONIXRecord(models.Model):
+    isbn_pool = models.ForeignKey(ISBNPool)
+    datestamp = models.DateField(auto_now=True)
+    suffix = models.IntegerField()
+    product_form = models.CharField(max_length=4)
+    product_form_detail = models.CharField(max_length=8, blank=True)
+    title = models.CharField(max_length=256)
+    part_number = models.CharField(max_length=64, blank=True)
+    contributors = JSONField()  # roles, names, optional: ISNI, date of birth/death
+    edition_type = models.CharField(max_length=4)
+    edition_number = models.IntegerField(default=1)
+    language = models.CharField(max_length=4)
+    imprint = models.CharField(max_length=256)
+    publishing_date = models.DateField()
+
+    class Meta:
+        ordering = ['isbn_pool__id', 'suffix']
+
+    def isbn(self):
+        return self.isbn_pool.isbn(self.suffix)
+
+    def reference(self):
+        return 'pl-eisbn-%s' % (self.isbn_pool.ref_from + self.suffix)
index ac36651..72035d9 100644 (file)
@@ -59,6 +59,7 @@ INSTALLED_APPS_OUR = [
     'libraries',
     'newsletter',
     'contact',
+    'isbn',
 ]
 
 GETPAID_BACKENDS = (