2 from urllib.request import urlopen
4 from django.core.management import BaseCommand
5 from slugify import slugify
7 from catalogue.models import Book, Author
11 name_pieces = name.rsplit(" ", 1)
12 if len(name_pieces) == 1:
13 return name_pieces[0], ""
18 def find_wikidata(link, lang):
20 title = link.rsplit("/", 1)[-1]
21 title = link.split("#", 1)[0]
22 title = title.replace(" ", "_")
25 f"https://www.wikidata.org/w/api.php?action=wbgetentities&sites={lang}wiki&titles={title}&format=json"
28 wikidata_id = list(data["entities"].keys())[0]
29 if not wikidata_id.startswith("Q"):
34 class Command(BaseCommand):
35 def add_arguments(self, parser):
36 parser.add_argument("path")
38 def handle(self, path, **kwargs):
44 if item["model"] == "pdcounter.bookstub":
48 print(item["fields"]["author"], item["fields"]["title"])
49 slug = item["fields"]["slug"]
50 book, created = Book.objects.get_or_create(slug=slug)
51 if item["fields"]["translator"] and not book.translators.exists():
52 notes.append("tłum.: " + item["fields"]["translator"])
53 book.title = book.title or item["fields"]["title"]
54 book.pd_year = book.pd_year or item["fields"]["pd"]
55 notes = "\n".join(notes)
56 if notes and notes not in book.notes:
57 book.notes = "\n".join([notes, book.notes])
60 if not book.authors.exists():
61 first_name, last_name = parse_name(item["fields"]["author"])
62 author_slug = slugify(item["fields"]["author"])
64 Author.objects.filter(slug=author_slug).first()
65 or Author.objects.filter(
66 first_name=first_name, last_name=last_name
70 author.slug = author.slug or author_slug
71 author.first_name = author.first_name or first_name
72 author.last_name = author.last_name or last_name
74 book.authors.set([author])
75 elif item["model"] == "pdcounter.author":
78 slug = item["fields"]["slug"]
79 author, created = Author.objects.get_or_create(slug=slug)
80 if not author.first_name and not author.last_name:
81 author.first_name, author.last_name = parse_name(
82 item["fields"]["name"]
84 author.year_of_death = (
85 author.year_of_death or item["fields"]["death"]
87 author.notes = author.notes or item["fields"]["description"]
88 author.gazeta_link = (
89 author.gazeta_link or item["fields"]["gazeta_link"]
92 wiki_link = item["fields"]["wiki_link"]
93 assert not wiki_link # Welp
94 elif item["model"] == "catalogue.book":
97 if item["fields"]["parent"]:
99 print(item["fields"]["slug"])
100 slug = item["fields"]["slug"]
101 book, created = Book.objects.get_or_create(slug=slug)
102 book.title = book.title or item["fields"]["title"]
103 book.language = book.language or item["fields"]["language"]
104 book.gazeta_link = book.gazeta_link or item["fields"]["gazeta_link"]
105 if item["fields"]["wiki_link"]:
108 or find_wikidata(item["fields"]["wiki_link"], "pl")
112 extra_info = json.loads(item["fields"]["extra_info"])
113 if book.pd_year is None and extra_info.get(
114 "released_to_public_domain_at"
117 extra_info["released_to_public_domain_at"].split("-", 1)[0]
122 if not book.authors.exists():
124 for astr in extra_info.get("authors", []):
125 parts = astr.split(", ")
127 first_name = parts[0]
130 last_name, first_name = parts
131 aslug = slugify(f"{first_name} {last_name}".strip())
133 Author.objects.filter(slug=aslug).first()
134 or Author.objects.filter(
135 first_name=first_name, last_name=last_name
137 or Author.objects.filter(name_de=astr).first()
138 or Author.objects.filter(name_lt=astr).first()
140 # Not trying to create the author or set properties, because here we don't know the dc:creator@xml:lang property.
141 if author is not None:
142 authors.append(author)
143 book.authors.set(authors)
144 elif item["model"] == "catalogue.tag":
147 if item["fields"]["category"] != "author":
149 slug = item["fields"]["slug"]
150 author, created = Author.objects.get_or_create(slug=slug)
151 author.name_de = author.name_de or item["fields"]["name_de"] or ""
152 author.name_lt = author.name_lt or item["fields"]["name_lt"] or ""
153 if not author.first_name and not author.last_name:
154 author.first_name, author.last_name = parse_name(
155 item["fields"]["name_pl"]
157 author.culturepl_link = (
158 author.culturepl_link or item["fields"]["culturepl_link"] or ""
160 author.gazeta_link = (
161 author.gazeta_link or item["fields"]["gazeta_link"] or ""
163 author.description = (
164 author.description or item["fields"]["description_pl"] or ""
166 author.description_de = (
167 author.description_de or item["fields"]["description_de"] or ""
169 author.description_lt = (
170 author.description_lt or item["fields"]["description_lt"] or ""
173 if not author.wikidata:
174 for field, value in item["fields"].items():
175 if field.startswith("wiki_link_") and value:
176 wd = find_wikidata(value, field.rsplit("_", 1)[-1])