From 0c5403e0b84d85ce6f515e2b8e5bc6190647645f Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Wed, 15 Apr 2020 13:53:07 +0200 Subject: [PATCH 1/1] Importing catalogue from pdcounter dump. --- .../commands/import_catalogue_from_wl_dump.py | 63 +++++++++++++++++++ .../migrations/0010_auto_20200415_1336.py | 33 ++++++++++ src/catalogue/models.py | 4 ++ 3 files changed, 100 insertions(+) create mode 100644 src/catalogue/management/commands/import_catalogue_from_wl_dump.py create mode 100644 src/catalogue/migrations/0010_auto_20200415_1336.py diff --git a/src/catalogue/management/commands/import_catalogue_from_wl_dump.py b/src/catalogue/management/commands/import_catalogue_from_wl_dump.py new file mode 100644 index 00000000..226fc476 --- /dev/null +++ b/src/catalogue/management/commands/import_catalogue_from_wl_dump.py @@ -0,0 +1,63 @@ +import json +import sys +from django.core.management import BaseCommand +from slugify import slugify +import wikidata +from catalogue.models import Book, Author + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('path') + + def handle(self, path, **kwargs): + with open(path) as f: + data = json.load(f) + for item in data: + if item['model'] == 'pdcounter.bookstub': + continue + notes = [] + slug = item['fields']['slug'] + book, created = Book.objects.get_or_create(slug=slug) + if item['fields']['translator'] and not book.translators.exists(): + notes.append('tłum.: ' + item['fields']['translator']) + book.title = book.title or item['fields']['title'] + book.pd_year = book.pd_year or item['fields']['pd'] + notes = '\n'.join(notes) + if notes and notes not in book.notes: + book.notes = '\n'.join([notes, book.notes]) + book.save() + + if not book.authors.exists(): + author_name = item['fields']['author'] + name_pieces = author_name.rsplit(' ', 1) + if len(name_pieces) == 1: + first_name, last_name = name_pieces, '' + else: + first_name, last_name = name_pieces + + author, created = Author.objects.get_or_create(first_name=first_name, last_name=last_name) + if not author.slug: + print(author.slug, author_name) + author.slug = slugify(author_name) + author.save() + book.authors.set([author]) + elif item['model'] == 'pdcounter.author': + slug = item['fields']['slug'] + author, created = Author.objects.get_or_create(slug=slug) + if not author.first_name and not author.last_name: + author_name = item['fields']['name'] + name_pieces = author_name.rsplit(' ', 1) + if len(name_pieces) == 1: + author.first_name, author.last_name = name_pieces, '' + else: + author.first_name, author.last_name = name_pieces + author.year_of_death = author.year_of_death or item['fields']['death'] + author.notes = author.notes or item['fields']['description'] + author.gazeta_link = author.gazeta_link or item['fields']['gazeta_link'] + wiki_link = item['fields']['wiki_link'] + assert not wiki_link # Welp + else: + print(item) + break + diff --git a/src/catalogue/migrations/0010_auto_20200415_1336.py b/src/catalogue/migrations/0010_auto_20200415_1336.py new file mode 100644 index 00000000..dcb8fd6d --- /dev/null +++ b/src/catalogue/migrations/0010_auto_20200415_1336.py @@ -0,0 +1,33 @@ +# Generated by Django 3.0.4 on 2020-04-15 13:36 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('catalogue', '0009_auto_20200411_1114'), + ] + + operations = [ + migrations.AddField( + model_name='author', + name='culturepl_link', + field=models.CharField(blank=True, max_length=255), + ), + migrations.AddField( + model_name='author', + name='description', + field=models.TextField(blank=True), + ), + migrations.AddField( + model_name='author', + name='gazeta_link', + field=models.CharField(blank=True, max_length=255), + ), + migrations.AddField( + model_name='book', + name='pd_year', + field=models.IntegerField(blank=True, null=True), + ), + ] diff --git a/src/catalogue/models.py b/src/catalogue/models.py index c2ed3812..530483c8 100644 --- a/src/catalogue/models.py +++ b/src/catalogue/models.py @@ -20,6 +20,9 @@ class Author(WikidataMixin, models.Model): ], ) notes = models.TextField(blank=True) + gazeta_link = models.CharField(max_length=255, blank=True) + culturepl_link = models.CharField(max_length=255, blank=True) + description = models.TextField(blank=True) priority = models.PositiveSmallIntegerField( default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))] ) @@ -54,6 +57,7 @@ class Book(WikidataMixin, models.Model): priority = models.PositiveSmallIntegerField( default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))] ) + pd_year = models.IntegerField(null=True, blank=True) class Wikidata: authors = WIKIDATA.AUTHOR -- 2.20.1