X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/5913c54d19b8f6775633176032161d49f9b2f1aa..b485e5fbd3a127dc13d8b647c6819c2e314cf0fd:/src/cover/utils.py diff --git a/src/cover/utils.py b/src/cover/utils.py old mode 100755 new mode 100644 index 51aee190..8e215dba --- a/src/cover/utils.py +++ b/src/cover/utils.py @@ -1,19 +1,19 @@ -# -*- coding: utf-8 -*- -# # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import csv +from io import StringIO import json import re -from urllib import FancyURLopener - -from django.contrib.sites.models import Site +from urllib.request import FancyURLopener +from wikidata.client import Client +from catalogue.constants import WIKIDATA class URLOpener(FancyURLopener): @property def version(self): - return 'FNP Redakcja (http://%s)' % Site.objects.get_current() + return 'FNP Redakcja' class FlickrError(Exception): @@ -71,3 +71,95 @@ def get_flickr_data(url): 'title': title, 'download_url': download_url, } + + +def get_wikimedia_data(url): + """ + >>> get_wikimedia_data('https://commons.wikimedia.org/wiki/File:Valdai_IverskyMon_asv2018_img47.jpg') + {'title': 'Valdai IverskyMon asv2018 img47', 'author': 'A.Savin', 'source_url': 'https://commons.wikimedia.org/wiki/File:Valdai_IverskyMon_asv2018_img47.jpg', 'download_url': 'https://upload.wikimedia.org/wikipedia/commons/4/43/Valdai_IverskyMon_asv2018_img47.jpg', 'license_url': 'http://artlibre.org/licence/lal/en', 'license_name': 'FAL'} + + >>> get_wikimedia_data('https://commons.wikimedia.org/wiki/File:Pymonenko_A_boy_in_a_straw_hat.jpg') + {'title': 'Chłopiec w słomkowym kapeluszu', 'author': 'Mykola Pymonenko', 'source_url': 'https://commons.wikimedia.org/wiki/File:Pymonenko_A_boy_in_a_straw_hat.jpg', 'download_url': 'https://upload.wikimedia.org/wikipedia/commons/9/9b/Pymonenko_A_boy_in_a_straw_hat.jpg', 'license_url': 'https://pl.wikipedia.org/wiki/Domena_publiczna', 'license_name': 'domena publiczna'} + + """ + file_name = url.rsplit('/', 1)[-1].rsplit(':', 1)[-1] + d = json.loads(URLOpener().open('https://commons.wikimedia.org/w/api.php?action=query&titles=File:{}&prop=imageinfo&iiprop=url|user|extmetadata&iimetadataversion=latest&format=json'.format(file_name)).read().decode('utf-8')) + + d = list(d['query']['pages'].values())[0]['imageinfo'][0] + ext = d['extmetadata'] + + meta = { + 'title': ext['ObjectName']['value'], + 'author': d['user'], + 'source_url': d['descriptionurl'], + 'download_url': d['url'], + 'license_url': ext.get('LicenseUrl', {}).get('value', ''), + 'license_name': ext['LicenseShortName']['value'], + } + + # There are Wikidata links in ObjectName sametimes. Let's use it. + wikidata_match = re.search(r'wikidata\.org/wiki/(Q\d+)', meta['title']) + if wikidata_match is not None: + qitem = wikidata_match.group(1) + client = Client() + entity = client.get(qitem) + meta['title'] = entity.label.get('pl', str(entity.label)) + author = entity.get(client.get(WIKIDATA.CREATOR)) + meta['author'] = author.label.get('pl', str(author.label)) + + if meta['license_name'] == 'Public domain': + meta['license_name'] = 'domena publiczna' + meta['license_url'] = 'https://pl.wikipedia.org/wiki/Domena_publiczna' + + + return meta + + +def get_mnw_data(url): + """ + >>> get_mnw_data('https://cyfrowe.mnw.art.pl/pl/katalog/511078') + {'title': 'Chłopka (Baba ukraińska)', 'author': 'Krzyżanowski, Konrad (1872-1922)', 'source_url': 'https://cyfrowe.mnw.art.pl/pl/katalog/511078', 'download_url': 'https://cyfrowe-cdn.mnw.art.pl/upload/multimedia/32/60/3260ae1704cc530cc62befa9b7d58cbd.jpg', 'license_url': 'https://pl.wikipedia.org/wiki/Domena_publiczna', 'license_name': 'domena publiczna'} + """ + nr = url.rsplit('/', 1)[-1] + d = list( + csv.DictReader( + StringIO( + URLOpener().open( + 'https://cyfrowe-api.mnw.art.pl/api/object/{}/csv'.format(nr) + ).read().decode('utf-8') + ) + ) + )[0] + + authors = [] + i = 0 + while f'authors.{i}.name' in d: + authors.append(d[f'authors.{i}.name']) + i += 1 + + license_url = d['copyrights.0.link'] + license_name = d['copyrights.0.name'] + if license_name == 'DOMENA PUBLICZNA': + license_name = 'domena publiczna' + license_url = 'https://pl.wikipedia.org/wiki/Domena_publiczna' + + return { + 'title': d['title'], + 'author': ', '.join(authors), + 'source_url': url, + 'download_url': 'https://cyfrowe-cdn.mnw.art.pl/upload/multimedia/{}.{}'.format( + d['image.filePath'], + d['image.extension'], + ), + 'license_url': license_url, + 'license_name': license_name, + } + + +def get_import_data(url): + if re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/', url): + return get_flickr_data(url) + if re.match(r'(https?://)?(commons|upload)\.wikimedia\.org/', url): + return get_wikimedia_data(url) + if re.match(r'(https?://)?cyfrowe\.mnw\.art\.pl/', url): + return get_mnw_data(url)