X-Git-Url: https://git.mdrn.pl/redakcja.git/blobdiff_plain/44f4112a8796f5d2c555bed63f34b9ce98e5fe65..db8146ee85026b477da41a4e02a0833ee35ee628:/apps/cover/forms.py?ds=sidebyside diff --git a/apps/cover/forms.py b/apps/cover/forms.py index f49f4f54..e4c949c8 100755 --- a/apps/cover/forms.py +++ b/apps/cover/forms.py @@ -3,6 +3,7 @@ # This file is part of FNP-Redakcja, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # +import json import re from urllib2 import urlopen from django import forms @@ -62,6 +63,7 @@ class FlickrForm(forms.Form): def clean_source_url(self): def normalize_html(html): + return html return re.sub('[\t\n]', '', html) url = self.cleaned_data['source_url'] @@ -75,34 +77,33 @@ class FlickrForm(forms.Form): html = normalize_html(urlopen(url).read().decode('utf-8')) except: raise forms.ValidationError('Error reading page.') - match = re.search(r'Some rights reserved', html) + match = re.search(r']* rel="license ', html) try: assert match license_url = match.group(1) self.cleaned_data['license_url'] = license_url - re_license = re.compile(r'http://creativecommons.org/licenses/([^/]*)/([^/]*)/.*') + re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*') m = re_license.match(license_url) assert m self.cleaned_data['license_name'] = 'CC %s %s' % (m.group(1).upper(), m.group(2)) except AssertionError: raise forms.ValidationError('Error reading license name.') - m = re.search(r'"ownername":"([^"]*)', html) + m = re.search(r']* class="owner-name [^>]*>([^<]*)<', html) if m: self.cleaned_data['author'] = "%s@Flickr" % m.group(1) else: raise forms.ValidationError('Error reading author name.') - m = re.search(r']*>(.*?)', html) + m = re.search(r']*>(.*?)', html, re.S) if not m: raise forms.ValidationError('Error reading image title.') - self.cleaned_data['title'] = m.group(1) + self.cleaned_data['title'] = m.group(1).strip() - url_size = base_url + "sizes/o/" - html = normalize_html(urlopen(url_size).read().decode('utf-8')) - m = re.search(r'
\s*