From feee71de06ace01f793c2c8adf9cce65b9df2cec Mon Sep 17 00:00:00 2001 From: Jan Szejko Date: Wed, 26 Apr 2017 20:33:07 +0200 Subject: [PATCH] refreshing flickr cover images + some cleanup --- apps/cover/forms.py | 65 ++++--------------- apps/cover/management/__init__.py | 4 ++ apps/cover/management/commands/__init__.py | 4 ++ .../management/commands/refresh_covers.py | 34 ++++++++++ apps/cover/models.py | 18 +++-- apps/cover/utils.py | 52 +++++++++++++++ apps/cover/views.py | 3 - 7 files changed, 119 insertions(+), 61 deletions(-) create mode 100644 apps/cover/management/__init__.py create mode 100644 apps/cover/management/commands/__init__.py create mode 100644 apps/cover/management/commands/refresh_covers.py diff --git a/apps/cover/forms.py b/apps/cover/forms.py index e4c949c8..5cc316b2 100755 --- a/apps/cover/forms.py +++ b/apps/cover/forms.py @@ -3,14 +3,14 @@ # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import json -import re -from urllib2 import urlopen from django import forms from django.utils.translation import ugettext_lazy as _, ugettext from cover.models import Image from django.utils.text import mark_safe +from cover.utils import get_flickr_data, FlickrError + + class ImageAddForm(forms.ModelForm): class Meta: model = Image @@ -28,8 +28,8 @@ class ImageAddForm(forms.ModelForm): pass else: raise forms.ValidationError(mark_safe( - ugettext('Image already in repository.' - ) % {'url': img.get_absolute_url()})) + ugettext('Image already in repository.') + % {'url': img.get_absolute_url()})) return cl def clean(self): @@ -38,6 +38,7 @@ class ImageAddForm(forms.ModelForm): raise forms.ValidationError('No image specified') return cleaned_data + class ImageEditForm(forms.ModelForm): """Form used for editing a Book.""" class Meta: @@ -49,61 +50,23 @@ class ReadonlyImageEditForm(ImageEditForm): """Form used for not editing an Image.""" def __init__(self, *args, **kwargs): - ret = super(ReadonlyImageEditForm, self).__init__(*args, **kwargs) + super(ReadonlyImageEditForm, self).__init__(*args, **kwargs) for field in self.fields.values(): field.widget.attrs.update({"readonly": True}) - return ret def save(self, *args, **kwargs): - raise AssertionError, "ReadonlyImageEditForm should not be saved." + raise AssertionError("ReadonlyImageEditForm should not be saved.") class FlickrForm(forms.Form): source_url = forms.URLField(label=_('Flickr URL')) def clean_source_url(self): - def normalize_html(html): - return html - return re.sub('[\t\n]', '', html) - url = self.cleaned_data['source_url'] - m = re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/photos/(?P[^/]+)/(?P\d+)/?', url) - if not m: - raise forms.ValidationError("It doesn't look like Flickr URL.") - author_slug, img_id = m.group('author'), m.group('img') - base_url = "https://www.flickr.com/photos/%s/%s/" % (author_slug, img_id) - - try: - html = normalize_html(urlopen(url).read().decode('utf-8')) - except: - raise forms.ValidationError('Error reading page.') - match = re.search(r']* rel="license ', html) - try: - assert match - license_url = match.group(1) - self.cleaned_data['license_url'] = license_url - re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*') - m = re_license.match(license_url) - assert m - self.cleaned_data['license_name'] = 'CC %s %s' % (m.group(1).upper(), m.group(2)) - except AssertionError: - raise forms.ValidationError('Error reading license name.') - - m = re.search(r']* class="owner-name [^>]*>([^<]*)<', html) - if m: - self.cleaned_data['author'] = "%s@Flickr" % m.group(1) - else: - raise forms.ValidationError('Error reading author name.') - - m = re.search(r']*>(.*?)', html, re.S) - if not m: - raise forms.ValidationError('Error reading image title.') - self.cleaned_data['title'] = m.group(1).strip() - - m = re.search(r'modelExport: (\{.*\})', html) try: - assert m - self.cleaned_data['download_url'] = 'https:' + json.loads(m.group(1))['photo-models'][0]['sizes']['o']['url'] - except (AssertionError, ValueError, IndexError, KeyError): - raise forms.ValidationError('Error reading image URL.') - return base_url + flickr_data = get_flickr_data(url) + except FlickrError as e: + raise forms.ValidationError(e) + for field_name in ('license_url', 'license_name', 'author', 'title', 'download_url'): + self.cleaned_data[field_name] = flickr_data[field_name] + return flickr_data['source_url'] diff --git a/apps/cover/management/__init__.py b/apps/cover/management/__init__.py new file mode 100644 index 00000000..d3841244 --- /dev/null +++ b/apps/cover/management/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# diff --git a/apps/cover/management/commands/__init__.py b/apps/cover/management/commands/__init__.py new file mode 100644 index 00000000..d3841244 --- /dev/null +++ b/apps/cover/management/commands/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# diff --git a/apps/cover/management/commands/refresh_covers.py b/apps/cover/management/commands/refresh_covers.py new file mode 100644 index 00000000..59c68d2a --- /dev/null +++ b/apps/cover/management/commands/refresh_covers.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# +# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. +# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. +# +import urllib2 as urllib + +from django.core.files.base import ContentFile +from django.core.management.base import NoArgsCommand + +from cover.models import Image +from cover.utils import get_flickr_data, URLOpener, FlickrError + + +class Command(NoArgsCommand): + def handle_noargs(self, **options): + for image in Image.objects.exclude(book=None).order_by('id'): + print image.id + if 'flickr.com' in image.source_url: + try: + flickr_data = get_flickr_data(image.source_url) + except FlickrError as e: + print 'Flickr analysis failed: %s' % e + else: + try: + t = URLOpener().open(image.download_url).read() + except urllib.URLError: + print 'Broken download url' + except IOError: + print 'Connection failed' + else: + image.download_url = flickr_data['download_url'] + image.file.save(image.file.name, ContentFile(t)) + image.save() diff --git a/apps/cover/models.py b/apps/cover/models.py index e9296889..d83dad39 100644 --- a/apps/cover/models.py +++ b/apps/cover/models.py @@ -3,10 +3,8 @@ # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -import re -from urlparse import urljoin -from django.conf import settings from django.core.files.base import ContentFile +from django.core.files.storage import FileSystemStorage from django.db import models from django.db.models.signals import post_save from django.dispatch import receiver @@ -15,6 +13,13 @@ from django.contrib.sites.models import Site from cover.utils import URLOpener +class OverwriteStorage(FileSystemStorage): + + def get_available_name(self, name, max_length=None): + self.delete(name) + return name + + class Image(models.Model): title = models.CharField(max_length=255, verbose_name=_('title')) author = models.CharField(max_length=255, verbose_name=_('author')) @@ -22,7 +27,8 @@ class Image(models.Model): license_url = models.URLField(max_length=255, blank=True, verbose_name=_('license URL')) source_url = models.URLField(verbose_name=_('source URL'), null=True, blank=True) download_url = models.URLField(unique=True, verbose_name=_('image download URL'), null=True, blank=True) - file = models.ImageField(upload_to='cover/image', editable=True, verbose_name=_('file')) + file = models.ImageField( + upload_to='cover/image', storage=OverwriteStorage(), editable=True, verbose_name=_('file')) class Meta: verbose_name = _('cover image') @@ -33,7 +39,7 @@ class Image(models.Model): @models.permalink def get_absolute_url(self): - return ('cover_image', [self.id]) + return 'cover_image', [self.id] def get_full_url(self): return "http://%s%s" % (Site.objects.get_current().domain, self.get_absolute_url()) @@ -44,5 +50,3 @@ def download_image(sender, instance, **kwargs): if instance.pk and not instance.file: t = URLOpener().open(instance.download_url).read() instance.file.save("%d.jpg" % instance.pk, ContentFile(t)) - - diff --git a/apps/cover/utils.py b/apps/cover/utils.py index e22fa727..80ee0b7e 100755 --- a/apps/cover/utils.py +++ b/apps/cover/utils.py @@ -3,7 +3,10 @@ # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import json +import re from urllib import FancyURLopener + from django.contrib.sites.models import Site @@ -11,3 +14,52 @@ class URLOpener(FancyURLopener): @property def version(self): return 'FNP Redakcja (http://%s)' % Site.objects.get_current() + + +class FlickrError(Exception): + pass + + +def get_flickr_data(url): + m = re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/photos/(?P[^/]+)/(?P\d+)/?', url) + if not m: + raise FlickrError("It doesn't look like Flickr URL.") + author_slug, img_id = m.group('author'), m.group('img') + base_url = "https://www.flickr.com/photos/%s/%s/" % (author_slug, img_id) + try: + html = URLOpener().open(url).read().decode('utf-8') + except IOError: + raise FlickrError('Error reading page') + match = re.search(r']* rel="license ', html) + if not match: + raise FlickrError('License not found.') + else: + license_url = match.group(1) + re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*') + m = re_license.match(license_url) + if not m: + raise FlickrError('License does not look like CC: %s' % license_url) + license_name = 'CC %s %s' % (m.group(1).upper(), m.group(2)) + m = re.search(r']* class="owner-name [^>]*>([^<]*)<', html) + if m: + author = "%s@Flickr" % m.group(1) + else: + raise FlickrError('Error reading author name.') + m = re.search(r']*>(.*?)', html, re.S) + if not m: + raise FlickrError('Error reading image title.') + title = m.group(1).strip() + m = re.search(r'modelExport: (\{.*\})', html) + try: + assert m + download_url = 'https:' + json.loads(m.group(1))['photo-models'][0]['sizes']['o']['url'] + except (AssertionError, ValueError, IndexError, KeyError): + raise FlickrError('Error reading image URL.') + return { + 'source_url': base_url, + 'license_url': license_url, + 'license_name': license_name, + 'author': author, + 'title': title, + 'download_url': download_url, + } diff --git a/apps/cover/views.py b/apps/cover/views.py index 4a6e575b..607d8088 100644 --- a/apps/cover/views.py +++ b/apps/cover/views.py @@ -15,7 +15,6 @@ from catalogue.models import Chunk from cover.models import Image from cover import forms - PREVIEW_SIZE = (216, 300) @@ -107,8 +106,6 @@ def image(request, pk): @active_tab('cover') def image_list(request): - objects = Image.objects.all() - enable_add = request.user.has_perm('cover.add_image') return render(request, "cover/image_list.html", { 'object_list': Image.objects.all(), 'can_add': request.user.has_perm('cover.add_image'), -- 2.20.1