- m = re.match(r'(https?://)?(www\.|secure\.)?flickr\.com/photos/(?P<author>[^/]+)/(?P<img>\d+)/?', url)
- if not m:
- raise forms.ValidationError("It doesn't look like Flickr URL.")
- author_slug, img_id = m.group('author'), m.group('img')
- base_url = "https://www.flickr.com/photos/%s/%s/" % (author_slug, img_id)
-
- try:
- html = normalize_html(urlopen(url).read().decode('utf-8'))
- except:
- raise forms.ValidationError('Error reading page.')
- match = re.search(r'<a href="([^"]*)"[^>]* rel="license ', html)
- try:
- assert match
- license_url = match.group(1)
- self.cleaned_data['license_url'] = license_url
- re_license = re.compile(r'https?://creativecommons.org/licenses/([^/]*)/([^/]*)/.*')
- m = re_license.match(license_url)
- assert m
- self.cleaned_data['license_name'] = 'CC %s %s' % (m.group(1).upper(), m.group(2))
- except AssertionError:
- raise forms.ValidationError('Error reading license name.')
-
- m = re.search(r'<a[^>]* class="owner-name [^>]*>([^<]*)<', html)
- if m:
- self.cleaned_data['author'] = "%s@Flickr" % m.group(1)
- else:
- raise forms.ValidationError('Error reading author name.')
-
- m = re.search(r'<h1[^>]*>(.*?)</h1>', html, re.S)
- if not m:
- raise forms.ValidationError('Error reading image title.')
- self.cleaned_data['title'] = m.group(1).strip()
-
- m = re.search(r'modelExport: (\{.*\})', html)