1 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from urllib.request import urlopen
7 from django.core.management import BaseCommand
8 from slugify import slugify
10 from catalogue.models import Book, Author
14 name_pieces = name.rsplit(" ", 1)
15 if len(name_pieces) == 1:
16 return name_pieces[0], ""
21 def find_wikidata(link, lang):
23 title = link.rsplit("/", 1)[-1]
24 title = title.split("#", 1)[0]
25 title = title.replace(" ", "_")
28 f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites={lang}wiki&titles={title}&format=json"
31 wikidata_id = list(data["entities"].keys())[0]
32 if not wikidata_id.startswith("Q"):
37 class Command(BaseCommand):
38 def add_arguments(self, parser):
39 parser.add_argument("path")
41 def handle(self, path, **kwargs):
47 if item["model"] == "pdcounter.bookstub":
51 print(item["fields"]["author"], item["fields"]["title"])
52 slug = item["fields"]["slug"]
53 book, created = Book.objects.get_or_create(slug=slug)
54 if item["fields"]["translator"] and not book.translators.exists():
55 notes.append("tłum.: " + item["fields"]["translator"])
56 book.title = book.title or item["fields"]["title"]
57 book.pd_year = book.pd_year or item["fields"]["pd"]
58 notes = "\n".join(notes)
59 if notes and notes not in book.notes:
60 book.notes = "\n".join([notes, book.notes])
63 if not book.authors.exists():
64 first_name, last_name = parse_name(item["fields"]["author"])
65 author_slug = slugify(item["fields"]["author"])
67 Author.objects.filter(slug=author_slug).first()
68 or Author.objects.filter(
69 first_name=first_name, last_name=last_name
73 author.slug = author.slug or author_slug
74 author.first_name = author.first_name or first_name
75 author.last_name = author.last_name or last_name
77 book.authors.set([author])
78 elif item["model"] == "pdcounter.author":
81 slug = item["fields"]["slug"]
82 author, created = Author.objects.get_or_create(slug=slug)
83 if not author.first_name and not author.last_name:
84 author.first_name, author.last_name = parse_name(
85 item["fields"]["name"]
87 author.year_of_death = (
88 author.year_of_death or item["fields"]["death"]
90 author.notes = author.notes or item["fields"]["description"]
91 author.gazeta_link = (
92 author.gazeta_link or item["fields"]["gazeta_link"]
95 wiki_link = item["fields"]["wiki_link"]
96 assert not wiki_link # Welp
97 elif item["model"] == "catalogue.book":
100 if item["fields"]["parent"]:
102 print(item["fields"]["slug"])
103 slug = item["fields"]["slug"]
104 book, created = Book.objects.get_or_create(slug=slug)
105 book.title = book.title or item["fields"]["title"]
106 book.language = book.language or item["fields"]["language"]
107 book.gazeta_link = book.gazeta_link or item["fields"]["gazeta_link"]
108 if item["fields"]["wiki_link"]:
111 or find_wikidata(item["fields"]["wiki_link"], "pl")
115 extra_info = json.loads(item["fields"]["extra_info"])
116 if book.pd_year is None and extra_info.get(
117 "released_to_public_domain_at"
120 extra_info["released_to_public_domain_at"].split("-", 1)[0]
125 if not book.authors.exists():
127 for astr in extra_info.get("authors", []):
128 parts = astr.split(", ")
130 first_name = parts[0]
133 last_name, first_name = parts
134 aslug = slugify(f"{first_name} {last_name}".strip())
136 Author.objects.filter(slug=aslug).first()
137 or Author.objects.filter(
138 first_name=first_name, last_name=last_name
140 or Author.objects.filter(name_de=astr).first()
141 or Author.objects.filter(name_lt=astr).first()
143 # Not trying to create the author or set properties, because here we don't know the dc:creator@xml:lang property.
144 if author is not None:
145 authors.append(author)
146 book.authors.set(authors)
147 elif item["model"] == "catalogue.tag":
150 if item["fields"]["category"] != "author":
152 slug = item["fields"]["slug"]
153 author, created = Author.objects.get_or_create(slug=slug)
154 author.name_de = author.name_de or item["fields"]["name_de"] or ""
155 author.name_lt = author.name_lt or item["fields"]["name_lt"] or ""
156 if not author.first_name and not author.last_name:
157 author.first_name, author.last_name = parse_name(
158 item["fields"]["name_pl"]
160 author.culturepl_link = (
161 author.culturepl_link or item["fields"]["culturepl_link"] or ""
163 author.gazeta_link = (
164 author.gazeta_link or item["fields"]["gazeta_link"] or ""
166 author.description = (
167 author.description or item["fields"]["description_pl"] or ""
169 author.description_de = (
170 author.description_de or item["fields"]["description_de"] or ""
172 author.description_lt = (
173 author.description_lt or item["fields"]["description_lt"] or ""
176 if not author.wikidata:
177 for field, value in item["fields"].items():
178 if field.startswith("wiki_link_") and value:
179 wd = find_wikidata(value, field.rsplit("_", 1)[-1])