Importing catalogue from WL dump.
authorRadek Czajka <rczajka@rczajka.pl>
Fri, 17 Apr 2020 16:53:07 +0000 (18:53 +0200)
committerRadek Czajka <rczajka@rczajka.pl>
Fri, 17 Apr 2020 16:53:07 +0000 (18:53 +0200)
src/catalogue/admin.py
src/catalogue/management/commands/import_catalogue_from_wl_dump.py
src/catalogue/migrations/0014_book_gazeta_link.py [new file with mode: 0644]
src/catalogue/migrations/0015_auto_20200416_1120.py [new file with mode: 0644]
src/catalogue/migrations/0016_auto_20200417_1421.py [new file with mode: 0644]
src/catalogue/migrations/0017_auto_20200417_1638.py [new file with mode: 0644]
src/catalogue/models.py
src/catalogue/wikidata.py

index 95da499..91656f1 100644 (file)
@@ -4,60 +4,95 @@ from .wikidata import WikidataAdminMixin
 
 
 class AuthorAdmin(WikidataAdminMixin, admin.ModelAdmin):
 
 
 class AuthorAdmin(WikidataAdminMixin, admin.ModelAdmin):
-    list_display = ["first_name", "last_name", 'status', "year_of_death", "priority", "wikidata_link"]
-    list_filter = ['year_of_death', 'priority', 'collections', 'status']
+    list_display = [
+        "first_name",
+        "last_name",
+        "status",
+        "year_of_death",
+        "priority",
+        "wikidata_link",
+        "slug",
+    ]
+    list_filter = ["year_of_death", "priority", "collections", "status"]
     search_fields = ["first_name", "last_name", "wikidata"]
     prepopulated_fields = {"slug": ("first_name", "last_name")}
     search_fields = ["first_name", "last_name", "wikidata"]
     prepopulated_fields = {"slug": ("first_name", "last_name")}
-    autocomplete_fields = ['collections']
+    autocomplete_fields = ["collections"]
 
 
 admin.site.register(models.Author, AuthorAdmin)
 
 
 class BookAdmin(WikidataAdminMixin, admin.ModelAdmin):
 
 
 admin.site.register(models.Author, AuthorAdmin)
 
 
 class BookAdmin(WikidataAdminMixin, admin.ModelAdmin):
-    list_display = ["title", 'authors_str', 'translators_str', 'language', 'pd_year', 'priority', 'wikidata_link']
-    search_fields = ["title", 'wikidata']
-    autocomplete_fields = ["authors", "translators", "based_on", 'collections']
+    list_display = [
+        "title",
+        "authors_str",
+        "translators_str",
+        "language",
+        "pd_year",
+        "priority",
+        "wikidata_link",
+    ]
+    search_fields = ["title", "wikidata"]
+    autocomplete_fields = ["authors", "translators", "based_on", "collections"]
     prepopulated_fields = {"slug": ("title",)}
     prepopulated_fields = {"slug": ("title",)}
-    list_filter = ['language', 'pd_year', 'collections']
-    readonly_fields = ['wikidata_link']
+    list_filter = ["language", "pd_year", "collections"]
+    readonly_fields = ["wikidata_link"]
     fieldsets = [
     fieldsets = [
-        (None, {'fields': [
-            ('wikidata', 'wikidata_link'),
-        ]}),
-        ('Identification', {'fields': [
-            'title', 'slug', 'authors', 'translators', 'language', 
-            'based_on',
-            'pd_year',
-        ]}),
-        ('Plan', {'fields': [
-            'scans_source',
-            'text_source',
-            'priority',
-            'collections',
-            'notes',
-        ]}),
+        (None, {"fields": [("wikidata", "wikidata_link")]}),
+        (
+            "Identification",
+            {
+                "fields": [
+                    "title",
+                    "slug",
+                    "authors",
+                    "translators",
+                    "language",
+                    "based_on",
+                    "pd_year",
+                ]
+            },
+        ),
+        (
+            "Plan",
+            {
+                "fields": [
+                    "scans_source",
+                    "text_source",
+                    "priority",
+                    "collections",
+                    "notes",
+                ]
+            },
+        ),
     ]
 
     ]
 
+    def get_queryset(self, request):
+        qs = super().get_queryset(request)
+        if request.resolver_match.view_name.endswith("changelist"):
+            qs = qs.prefetch_related("authors", "translators")
+        return qs
+
 
 admin.site.register(models.Book, BookAdmin)
 
 
 class AuthorInline(admin.TabularInline):
     model = models.Author.collections.through
 
 admin.site.register(models.Book, BookAdmin)
 
 
 class AuthorInline(admin.TabularInline):
     model = models.Author.collections.through
-    autocomplete_fields = ['author']
+    autocomplete_fields = ["author"]
 
 
 class BookInline(admin.TabularInline):
     model = models.Book.collections.through
 
 
 class BookInline(admin.TabularInline):
     model = models.Book.collections.through
-    autocomplete_fields = ['book']
+    autocomplete_fields = ["book"]
 
 
 class CollectionAdmin(admin.ModelAdmin):
 
 
 class CollectionAdmin(admin.ModelAdmin):
-    list_display = ['name']
+    list_display = ["name"]
     autocomplete_fields = []
     autocomplete_fields = []
-    prepopulated_fields = {'slug': ('name',)}
-    search_fields = ['name']
+    prepopulated_fields = {"slug": ("name",)}
+    search_fields = ["name"]
     inlines = [AuthorInline, BookInline]
 
     inlines = [AuthorInline, BookInline]
 
+
 admin.site.register(models.Collection, CollectionAdmin)
 admin.site.register(models.Collection, CollectionAdmin)
index ea3f805..2ce39f0 100644 (file)
@@ -1,4 +1,5 @@
 import json
 import json
+from urllib.request import urlopen
 import sys
 from django.core.management import BaseCommand
 from slugify import slugify
 import sys
 from django.core.management import BaseCommand
 from slugify import slugify
@@ -7,54 +8,177 @@ from catalogue.models import Book, Author
 
 
 def parse_name(name):
 
 
 def parse_name(name):
-    name_pieces = name.rsplit(' ', 1)
+    name_pieces = name.rsplit(" ", 1)
     if len(name_pieces) == 1:
     if len(name_pieces) == 1:
-        return name_pieces[0], ''
+        return name_pieces[0], ""
     else:
         return name_pieces
 
 
     else:
         return name_pieces
 
 
+def find_wikidata(link, lang):
+    link = link.rstrip()
+    title = link.rsplit("/", 1)[-1]
+    title = link.split("#", 1)[0]
+    title = title.replace(" ", "_")
+    data = json.load(
+        urlopen(
+            f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites={lang}wiki&titles={title}&format=json"
+        )
+    )
+    wikidata_id = list(data["entities"].keys())[0]
+    if not wikidata_id.startswith("Q"):
+        return None
+    return wikidata_id
+
 
 class Command(BaseCommand):
     def add_arguments(self, parser):
 
 class Command(BaseCommand):
     def add_arguments(self, parser):
-        parser.add_argument('path')
+        parser.add_argument("path")
 
     def handle(self, path, **kwargs):
         with open(path) as f:
             data = json.load(f)
 
     def handle(self, path, **kwargs):
         with open(path) as f:
             data = json.load(f)
-        for item in data:
-            if item['model'] == 'pdcounter.bookstub':
-                notes = []
-                slug = item['fields']['slug']
-                book, created = Book.objects.get_or_create(slug=slug)
-                if item['fields']['translator'] and not book.translators.exists():
-                    notes.append('tłum.: ' + item['fields']['translator'])
-                book.title = book.title or item['fields']['title']
-                book.pd_year = book.pd_year or item['fields']['pd']
-                notes = '\n'.join(notes)
-                if notes and notes not in book.notes:
-                    book.notes = '\n'.join([notes, book.notes])
-                book.save()
-
-                if not book.authors.exists():
-                    first_name, last_name = parse_name(item['fields']['author'])
-                    author, created = Author.objects.get_or_create(first_name=first_name, last_name=last_name)
-                    if not author.slug:
-                        author.slug = slugify(author_name)
+
+        for pass_n in (1, 2):
+            for item in data:
+                if item["model"] == "pdcounter.bookstub":
+                    if pass_n != 2:
+                        continue
+                    notes = []
+                    print(item["fields"]["author"], item["fields"]["title"])
+                    slug = item["fields"]["slug"]
+                    book, created = Book.objects.get_or_create(slug=slug)
+                    if item["fields"]["translator"] and not book.translators.exists():
+                        notes.append("tłum.: " + item["fields"]["translator"])
+                    book.title = book.title or item["fields"]["title"]
+                    book.pd_year = book.pd_year or item["fields"]["pd"]
+                    notes = "\n".join(notes)
+                    if notes and notes not in book.notes:
+                        book.notes = "\n".join([notes, book.notes])
+                    book.save()
+
+                    if not book.authors.exists():
+                        first_name, last_name = parse_name(item["fields"]["author"])
+                        author_slug = slugify(item["fields"]["author"])
+                        author = (
+                            Author.objects.filter(slug=author_slug).first()
+                            or Author.objects.filter(
+                                first_name=first_name, last_name=last_name
+                            ).first()
+                            or Author()
+                        )
+                        author.slug = author.slug or author_slug
+                        author.first_name = author.first_name or first_name
+                        author.last_name = author.last_name or last_name
+                        author.save()
+                        book.authors.set([author])
+                elif item["model"] == "pdcounter.author":
+                    if pass_n != 1:
+                        continue
+                    slug = item["fields"]["slug"]
+                    author, created = Author.objects.get_or_create(slug=slug)
+                    if not author.first_name and not author.last_name:
+                        author.first_name, author.last_name = parse_name(
+                            item["fields"]["name"]
+                        )
+                        author.year_of_death = (
+                            author.year_of_death or item["fields"]["death"]
+                        )
+                        author.notes = author.notes or item["fields"]["description"]
+                        author.gazeta_link = (
+                            author.gazeta_link or item["fields"]["gazeta_link"]
+                        )
                         author.save()
                         author.save()
-                    book.authors.set([author])
-            elif item['model'] == 'pdcounter.author':
-                slug = item['fields']['slug']
-                author, created = Author.objects.get_or_create(slug=slug)
-                if not author.first_name and not author.last_name:
-                    author.first_name, author.last_name = parse_name(item['fields']['name'])
-                    author.year_of_death = author.year_of_death or item['fields']['death']
-                    author.notes = author.notes or item['fields']['description']
-                    author.gazeta_link = author.gazeta_link or item['fields']['gazeta_link']
+                        wiki_link = item["fields"]["wiki_link"]
+                        assert not wiki_link  # Welp
+                elif item["model"] == "catalogue.book":
+                    if pass_n != 2:
+                        continue
+                    if item["fields"]["parent"]:
+                        continue
+                    print(item["fields"]["slug"])
+                    slug = item["fields"]["slug"]
+                    book, created = Book.objects.get_or_create(slug=slug)
+                    book.title = book.title or item["fields"]["title"]
+                    book.language = book.language or item["fields"]["language"]
+                    book.gazeta_link = book.gazeta_link or item["fields"]["gazeta_link"]
+                    if item["fields"]["wiki_link"]:
+                        book.wikidata = (
+                            book.wikidata
+                            or find_wikidata(item["fields"]["wiki_link"], "pl")
+                            or ""
+                        )
+
+                    extra_info = json.loads(item["fields"]["extra_info"])
+                    if book.pd_year is None and extra_info.get(
+                        "released_to_public_domain_at"
+                    ):
+                        book.pd_year = int(
+                            extra_info["released_to_public_domain_at"].split("-", 1)[0]
+                        )
+
+                    book.save()
+
+                    if not book.authors.exists():
+                        authors = []
+                        for astr in extra_info.get("authors", []):
+                            parts = astr.split(", ")
+                            if len(parts) == 1:
+                                first_name = parts[0]
+                                last_name = ""
+                            else:
+                                last_name, first_name = parts
+                            aslug = slugify(f"{first_name} {last_name}".strip())
+                            author = (
+                                Author.objects.filter(slug=aslug).first()
+                                or Author.objects.filter(
+                                    first_name=first_name, last_name=last_name
+                                ).first()
+                                or Author.objects.filter(name_de=astr).first()
+                                or Author.objects.filter(name_lt=astr).first()
+                            )
+                            # Not trying to create the author or set properties, because here we don't know the dc:creator@xml:lang property.
+                            if author is not None:
+                                authors.append(author)
+                        book.authors.set(authors)
+                elif item["model"] == "catalogue.tag":
+                    if pass_n != 1:
+                        continue
+                    if item["fields"]["category"] != "author":
+                        continue
+                    slug = item["fields"]["slug"]
+                    author, created = Author.objects.get_or_create(slug=slug)
+                    author.name_de = author.name_de or item["fields"]["name_de"] or ""
+                    author.name_lt = author.name_lt or item["fields"]["name_lt"] or ""
+                    if not author.first_name and not author.last_name:
+                        author.first_name, author.last_name = parse_name(
+                            item["fields"]["name_pl"]
+                        )
+                    author.culturepl_link = (
+                        author.culturepl_link or item["fields"]["culturepl_link"] or ""
+                    )
+                    author.gazeta_link = (
+                        author.gazeta_link or item["fields"]["gazeta_link"] or ""
+                    )
+                    author.description = (
+                        author.description or item["fields"]["description_pl"] or ""
+                    )
+                    author.description_de = (
+                        author.description_de or item["fields"]["description_de"] or ""
+                    )
+                    author.description_lt = (
+                        author.description_lt or item["fields"]["description_lt"] or ""
+                    )
+
+                    if not author.wikidata:
+                        for field, value in item["fields"].items():
+                            if field.startswith("wiki_link_") and value:
+                                wd = find_wikidata(value, field.rsplit("_", 1)[-1])
+                                if wd:
+                                    author.wikidata = wd
+                                    break
                     author.save()
                     author.save()
-                    wiki_link = item['fields']['wiki_link']
-                    assert not wiki_link # Welp
-            else:
-                print(item)
-                break
 
 
+                else:
+                    print(item)
+                    break
diff --git a/src/catalogue/migrations/0014_book_gazeta_link.py b/src/catalogue/migrations/0014_book_gazeta_link.py
new file mode 100644 (file)
index 0000000..4b0a496
--- /dev/null
@@ -0,0 +1,18 @@
+# Generated by Django 3.0.4 on 2020-04-15 22:12
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalogue', '0013_auto_20200415_1755'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='book',
+            name='gazeta_link',
+            field=models.CharField(blank=True, max_length=255),
+        ),
+    ]
diff --git a/src/catalogue/migrations/0015_auto_20200416_1120.py b/src/catalogue/migrations/0015_auto_20200416_1120.py
new file mode 100644 (file)
index 0000000..996e5a6
--- /dev/null
@@ -0,0 +1,23 @@
+# Generated by Django 3.0.4 on 2020-04-16 11:20
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalogue', '0014_book_gazeta_link'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='author',
+            name='name_de',
+            field=models.CharField(blank=True, max_length=255),
+        ),
+        migrations.AddField(
+            model_name='author',
+            name='name_lt',
+            field=models.CharField(blank=True, max_length=255),
+        ),
+    ]
diff --git a/src/catalogue/migrations/0016_auto_20200417_1421.py b/src/catalogue/migrations/0016_auto_20200417_1421.py
new file mode 100644 (file)
index 0000000..ca4b714
--- /dev/null
@@ -0,0 +1,23 @@
+# Generated by Django 3.0.4 on 2020-04-17 14:21
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalogue', '0015_auto_20200416_1120'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='author',
+            name='description_de',
+            field=models.TextField(blank=True),
+        ),
+        migrations.AddField(
+            model_name='author',
+            name='description_lt',
+            field=models.TextField(blank=True),
+        ),
+    ]
diff --git a/src/catalogue/migrations/0017_auto_20200417_1638.py b/src/catalogue/migrations/0017_auto_20200417_1638.py
new file mode 100644 (file)
index 0000000..c4a983a
--- /dev/null
@@ -0,0 +1,25 @@
+# Generated by Django 3.0.4 on 2020-04-17 16:38
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('catalogue', '0016_auto_20200417_1421'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='author',
+            name='wikidata',
+            field=models.CharField(blank=True, default='', help_text='If you have a Wikidata ID, like "Q1337", enter it and save.', max_length=255),
+            preserve_default=False,
+        ),
+        migrations.AlterField(
+            model_name='book',
+            name='wikidata',
+            field=models.CharField(blank=True, default='', help_text='If you have a Wikidata ID, like "Q1337", enter it and save.', max_length=255),
+            preserve_default=False,
+        ),
+    ]
index e41ba35..b45cefe 100644 (file)
@@ -8,6 +8,10 @@ class Author(WikidataMixin, models.Model):
     slug = models.SlugField(null=True, blank=True, unique=True)
     first_name = models.CharField(max_length=255, blank=True)
     last_name = models.CharField(max_length=255, blank=True)
     slug = models.SlugField(null=True, blank=True, unique=True)
     first_name = models.CharField(max_length=255, blank=True)
     last_name = models.CharField(max_length=255, blank=True)
+
+    name_de = models.CharField(max_length=255, blank=True)
+    name_lt = models.CharField(max_length=255, blank=True)
+
     year_of_death = models.SmallIntegerField(null=True, blank=True)
     status = models.PositiveSmallIntegerField(
         null=True,
     year_of_death = models.SmallIntegerField(null=True, blank=True)
     status = models.PositiveSmallIntegerField(
         null=True,
@@ -22,14 +26,18 @@ class Author(WikidataMixin, models.Model):
     notes = models.TextField(blank=True)
     gazeta_link = models.CharField(max_length=255, blank=True)
     culturepl_link = models.CharField(max_length=255, blank=True)
     notes = models.TextField(blank=True)
     gazeta_link = models.CharField(max_length=255, blank=True)
     culturepl_link = models.CharField(max_length=255, blank=True)
+
     description = models.TextField(blank=True)
     description = models.TextField(blank=True)
+    description_de = models.TextField(blank=True)
+    description_lt = models.TextField(blank=True)
+
     priority = models.PositiveSmallIntegerField(
         default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))]
     )
     priority = models.PositiveSmallIntegerField(
         default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))]
     )
-    collections = models.ManyToManyField('Collection', blank=True)
+    collections = models.ManyToManyField("Collection", blank=True)
 
     class Meta:
 
     class Meta:
-        ordering = ('last_name', 'first_name', 'year_of_death')
+        ordering = ("last_name", "first_name", "year_of_death")
 
     class Wikidata:
         first_name = WIKIDATA.GIVEN_NAME
 
     class Wikidata:
         first_name = WIKIDATA.GIVEN_NAME
@@ -62,10 +70,11 @@ class Book(WikidataMixin, models.Model):
         default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))]
     )
     pd_year = models.IntegerField(null=True, blank=True)
         default=0, choices=[(0, _("Low")), (1, _("Medium")), (2, _("High"))]
     )
     pd_year = models.IntegerField(null=True, blank=True)
-    collections = models.ManyToManyField('Collection', blank=True)
+    gazeta_link = models.CharField(max_length=255, blank=True)
+    collections = models.ManyToManyField("Collection", blank=True)
 
     class Meta:
 
     class Meta:
-        ordering = ('title',)
+        ordering = ("title",)
 
     class Wikidata:
         authors = WIKIDATA.AUTHOR
 
     class Wikidata:
         authors = WIKIDATA.AUTHOR
@@ -79,17 +88,17 @@ class Book(WikidataMixin, models.Model):
         txt = self.title
         astr = self.authors_str()
         if astr:
         txt = self.title
         astr = self.authors_str()
         if astr:
-            txt = f'{astr} – {txt}'
+            txt = f"{astr} – {txt}"
         tstr = self.translators_str()
         if tstr:
         tstr = self.translators_str()
         if tstr:
-            txt = f'{txt} (tłum. {tstr})'
+            txt = f"{txt} (tłum. {tstr})"
         return txt
 
     def authors_str(self):
         return txt
 
     def authors_str(self):
-        return ', '.join(str(author) for author in self.authors.all())
+        return ", ".join(str(author) for author in self.authors.all())
 
     def translators_str(self):
 
     def translators_str(self):
-        return ', '.join(str(author) for author in self.translators.all())
+        return ", ".join(str(author) for author in self.translators.all())
 
 
 class Collection(models.Model):
 
 
 class Collection(models.Model):
index c688570..88686b6 100644 (file)
@@ -11,9 +11,7 @@ from wikidata.datavalue import DatavalueError
 class WikidataMixin(models.Model):
     wikidata = models.CharField(
         max_length=255,
 class WikidataMixin(models.Model):
     wikidata = models.CharField(
         max_length=255,
-        null=True,
         blank=True,
         blank=True,
-        unique=True,
         help_text=_('If you have a Wikidata ID, like "Q1337", enter it and save.'),
     )
 
         help_text=_('If you have a Wikidata ID, like "Q1337", enter it and save.'),
     )
 
@@ -89,7 +87,11 @@ class WikidataAdminMixin:
 
     def wikidata_link(self, obj):
         if obj.wikidata:
 
     def wikidata_link(self, obj):
         if obj.wikidata:
-            return format_html('<a href="https://www.wikidata.org/wiki/{wd}" target="_blank">{wd}</a>', wd=obj.wikidata)
+            return format_html(
+                '<a href="https://www.wikidata.org/wiki/{wd}" target="_blank">{wd}</a>',
+                wd=obj.wikidata,
+            )
         else:
         else:
-            return ''
-    wikidata_link.admin_order_field = 'wikidata'
+            return ""
+
+    wikidata_link.admin_order_field = "wikidata"