From feee71de06ace01f793c2c8adf9cce65b9df2cec Mon Sep 17 00:00:00 2001
From: Jan Szejko <janek37@gmail.com>
Date: Wed, 26 Apr 2017 20:33:07 +0200
Subject: [PATCH] refreshing flickr cover images + some cleanup

---
 apps/cover/forms.py                           | 65 ++++---------------
 apps/cover/management/__init__.py             |  4 ++
 apps/cover/management/commands/__init__.py    |  4 ++
 .../management/commands/refresh_covers.py     | 34 ++++++++++
 apps/cover/models.py                          | 18 +++--
 apps/cover/utils.py                           | 52 +++++++++++++++
 apps/cover/views.py                           |  3 -
 7 files changed, 119 insertions(+), 61 deletions(-)
 create mode 100644 apps/cover/management/__init__.py
 create mode 100644 apps/cover/management/commands/__init__.py
 create mode 100644 apps/cover/management/commands/refresh_covers.py
diff --git a/apps/cover/forms.py b/apps/cover/forms.py
index e4c949c8..5cc316b2 100755
--- a/apps/cover/forms.py
+++ b/apps/cover/forms.py
@@ -3,14 +3,14 @@
 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
 # Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-import json
-import re
-from urllib2 import urlopen
 from django import forms
 from django.utils.translation import ugettext_lazy as _, ugettext
 from cover.models import Image
 from django.utils.text import mark_safe
 
+from cover.utils import get_flickr_data, FlickrError
+
+
 class ImageAddForm(forms.ModelForm):
     class Meta:
         model = Image
@@ -28,8 +28,8 @@ class ImageAddForm(forms.ModelForm):
                 pass
             else:
                 raise forms.ValidationError(mark_safe(
-                    ugettext('Image <a href="%(url)s">already in repository</a>.'
-                        ) % {'url': img.get_absolute_url()}))
+                    ugettext('Image <a href="%(url)s">already in repository</a>.')
+                    % {'url': img.get_absolute_url()}))
         return cl
 
     def clean(self):
@@ -38,6 +38,7 @@ class ImageAddForm(forms.ModelForm):
             raise forms.ValidationError('No image specified')
         return cleaned_data
 
+
 class ImageEditForm(forms.ModelForm):
     """Form used for editing a Book."""
     class Meta:
@@ -49,61 +50,23 @@ class ReadonlyImageEditForm(ImageEditForm):
     """Form used for not editing an Image."""
 
     def __init__(self, *args, **kwargs):
-        ret = super(ReadonlyImageEditForm, self).__init__(*args, **kwargs)
+        super(ReadonlyImageEditForm, self).__init__(*args, **kwargs)
         for field in self.fields.values():
             field.widget.attrs.update({"readonly": True})
-        return ret
 
     def save(self, *args, **kwargs):
-        raise AssertionError, "ReadonlyImageEditForm should not be saved."
+        raise AssertionError("ReadonlyImageEditForm should not be saved.")
 
 
 class FlickrForm(forms.Form):
     source_url = forms.URLField(label=_('Flickr URL'))
 
     def clean_source_url(self):
-        def normalize_html(html):
-            return html
-            return re.sub('[\t\n]', '', html)
-    
         url = self.cleaned_data['source_url']
-        m = re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/photos/(?P<author>[^/]+)/(?P<img>\d+)/?', url)
-        if not m:
-            raise forms.ValidationError("It doesn't look like Flickr URL.")
-        author_slug, img_id = m.group('author'), m.group('img')
-        base_url = "https://www.flickr.com/photos/%s/%s/" % (author_slug, img_id)
-
-        try:
-            html = normalize_html(urlopen(url).read().decode('utf-8'))
-        except:
-            raise forms.ValidationError('Error reading page.')
-        match = re.search(r'<a href="([^"]*)"[^>]* rel="license ', html)
-        try:
-            assert match
-            license_url = match.group(1)
-            self.cleaned_data['license_url'] = license_url
-            re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*')
-            m = re_license.match(license_url)
-            assert m
-            self.cleaned_data['license_name'] = 'CC %s %s' % (m.group(1).upper(), m.group(2))
-        except AssertionError:
-            raise forms.ValidationError('Error reading license name.')
-
-        m = re.search(r'<a[^>]* class="owner-name [^>]*>([^<]*)<', html)
-        if m:
-            self.cleaned_data['author'] = "%s@Flickr" % m.group(1)
-        else:
-            raise forms.ValidationError('Error reading author name.')
-
-        m = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.S)
-        if not m:
-            raise forms.ValidationError('Error reading image title.')
-        self.cleaned_data['title'] = m.group(1).strip()
-
-        m = re.search(r'modelExport: (\{.*\})', html)
         try:
-            assert m
-            self.cleaned_data['download_url'] = 'https:' + json.loads(m.group(1))['photo-models'][0]['sizes']['o']['url']
-        except (AssertionError, ValueError, IndexError, KeyError):
-            raise forms.ValidationError('Error reading image URL.')
-        return base_url
+            flickr_data = get_flickr_data(url)
+        except FlickrError as e:
+            raise forms.ValidationError(e)
+        for field_name in ('license_url', 'license_name', 'author', 'title', 'download_url'):
+            self.cleaned_data[field_name] = flickr_data[field_name]
+        return flickr_data['source_url']
diff --git a/apps/cover/management/__init__.py b/apps/cover/management/__init__.py
new file mode 100644
index 00000000..d3841244
--- /dev/null
+++ b/apps/cover/management/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
diff --git a/apps/cover/management/commands/__init__.py b/apps/cover/management/commands/__init__.py
new file mode 100644
index 00000000..d3841244
--- /dev/null
+++ b/apps/cover/management/commands/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
diff --git a/apps/cover/management/commands/refresh_covers.py b/apps/cover/management/commands/refresh_covers.py
new file mode 100644
index 00000000..59c68d2a
--- /dev/null
+++ b/apps/cover/management/commands/refresh_covers.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
+# Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import urllib2 as urllib
+
+from django.core.files.base import ContentFile
+from django.core.management.base import NoArgsCommand
+
+from cover.models import Image
+from cover.utils import get_flickr_data, URLOpener, FlickrError
+
+
+class Command(NoArgsCommand):
+    def handle_noargs(self, **options):
+        for image in Image.objects.exclude(book=None).order_by('id'):
+            print image.id
+            if 'flickr.com' in image.source_url:
+                try:
+                    flickr_data = get_flickr_data(image.source_url)
+                except FlickrError as e:
+                    print 'Flickr analysis failed: %s' % e
+                else:
+                    try:
+                        t = URLOpener().open(image.download_url).read()
+                    except urllib.URLError:
+                        print 'Broken download url'
+                    except IOError:
+                        print 'Connection failed'
+                    else:
+                        image.download_url = flickr_data['download_url']
+                        image.file.save(image.file.name, ContentFile(t))
+                        image.save()
diff --git a/apps/cover/models.py b/apps/cover/models.py
index e9296889..d83dad39 100644
--- a/apps/cover/models.py
+++ b/apps/cover/models.py
@@ -3,10 +3,8 @@
 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
 # Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-import re
-from urlparse import urljoin
-from django.conf import settings
 from django.core.files.base import ContentFile
+from django.core.files.storage import FileSystemStorage
 from django.db import models
 from django.db.models.signals import post_save
 from django.dispatch import receiver
@@ -15,6 +13,13 @@ from django.contrib.sites.models import Site
 from cover.utils import URLOpener
 
 
+class OverwriteStorage(FileSystemStorage):
+
+    def get_available_name(self, name, max_length=None):
+        self.delete(name)
+        return name
+
+
 class Image(models.Model):
     title = models.CharField(max_length=255, verbose_name=_('title'))
     author = models.CharField(max_length=255, verbose_name=_('author'))
@@ -22,7 +27,8 @@ class Image(models.Model):
     license_url = models.URLField(max_length=255, blank=True, verbose_name=_('license URL'))
     source_url = models.URLField(verbose_name=_('source URL'), null=True, blank=True)
     download_url = models.URLField(unique=True, verbose_name=_('image download URL'), null=True, blank=True)
-    file = models.ImageField(upload_to='cover/image', editable=True, verbose_name=_('file'))
+    file = models.ImageField(
+        upload_to='cover/image', storage=OverwriteStorage(), editable=True, verbose_name=_('file'))
 
     class Meta:
         verbose_name = _('cover image')
@@ -33,7 +39,7 @@ class Image(models.Model):
 
     @models.permalink
     def get_absolute_url(self):
-        return ('cover_image', [self.id])
+        return 'cover_image', [self.id]
 
     def get_full_url(self):
         return "http://%s%s" % (Site.objects.get_current().domain, self.get_absolute_url())
@@ -44,5 +50,3 @@ def download_image(sender, instance, **kwargs):
     if instance.pk and not instance.file:
         t = URLOpener().open(instance.download_url).read()
         instance.file.save("%d.jpg" % instance.pk, ContentFile(t))
-        
-        
diff --git a/apps/cover/utils.py b/apps/cover/utils.py
index e22fa727..80ee0b7e 100755
--- a/apps/cover/utils.py
+++ b/apps/cover/utils.py
@@ -3,7 +3,10 @@
 # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later.
 # Copyright Â© Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
+import json
+import re
 from urllib import FancyURLopener
+
 from django.contrib.sites.models import Site
 
 
@@ -11,3 +14,52 @@ class URLOpener(FancyURLopener):
     @property
     def version(self):
         return 'FNP Redakcja (http://%s)' % Site.objects.get_current()
+
+
+class FlickrError(Exception):
+    pass
+
+
+def get_flickr_data(url):
+    m = re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/photos/(?P<author>[^/]+)/(?P<img>\d+)/?', url)
+    if not m:
+        raise FlickrError("It doesn't look like Flickr URL.")
+    author_slug, img_id = m.group('author'), m.group('img')
+    base_url = "https://www.flickr.com/photos/%s/%s/" % (author_slug, img_id)
+    try:
+        html = URLOpener().open(url).read().decode('utf-8')
+    except IOError:
+        raise FlickrError('Error reading page')
+    match = re.search(r'<a href="([^"]*)"[^>]* rel="license ', html)
+    if not match:
+        raise FlickrError('License not found.')
+    else:
+        license_url = match.group(1)
+        re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*')
+        m = re_license.match(license_url)
+        if not m:
+            raise FlickrError('License does not look like CC: %s' % license_url)
+        license_name = 'CC %s %s' % (m.group(1).upper(), m.group(2))
+    m = re.search(r'<a[^>]* class="owner-name [^>]*>([^<]*)<', html)
+    if m:
+        author = "%s@Flickr" % m.group(1)
+    else:
+        raise FlickrError('Error reading author name.')
+    m = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.S)
+    if not m:
+        raise FlickrError('Error reading image title.')
+    title = m.group(1).strip()
+    m = re.search(r'modelExport: (\{.*\})', html)
+    try:
+        assert m
+        download_url = 'https:' + json.loads(m.group(1))['photo-models'][0]['sizes']['o']['url']
+    except (AssertionError, ValueError, IndexError, KeyError):
+        raise FlickrError('Error reading image URL.')
+    return {
+        'source_url': base_url,
+        'license_url': license_url,
+        'license_name': license_name,
+        'author': author,
+        'title': title,
+        'download_url': download_url,
+    }
diff --git a/apps/cover/views.py b/apps/cover/views.py
index 4a6e575b..607d8088 100644
--- a/apps/cover/views.py
+++ b/apps/cover/views.py
@@ -15,7 +15,6 @@ from catalogue.models import Chunk
 from cover.models import Image
 from cover import forms
 
-
 PREVIEW_SIZE = (216, 300)
 
 
@@ -107,8 +106,6 @@ def image(request, pk):
 
 @active_tab('cover')
 def image_list(request):
-    objects = Image.objects.all()
-    enable_add = request.user.has_perm('cover.add_image')
     return render(request, "cover/image_list.html", {
         'object_list': Image.objects.all(),
         'can_add': request.user.has_perm('cover.add_image'),
-- 
2.20.1