disable crawling for catalogue pages with multiple tags
[wolnelektury.git] / src / picture / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 from django.db import models, transaction
6 import catalogue.models
7 from django.db.models import permalink
8 from sorl.thumbnail import ImageField
9 from django.conf import settings
10 from django.contrib.contenttypes.fields import GenericRelation
11 from django.core.files.storage import FileSystemStorage
12 from django.utils.datastructures import SortedDict
13 from fnpdjango.utils.text.slughifi import slughifi
14 from ssify import flush_ssi_includes
15
16 from catalogue.models.tag import prefetched_relations
17 from picture import tasks
18 from StringIO import StringIO
19 import jsonfield
20 import itertools
21 import logging
22 import re
23
24 from PIL import Image
25
26 from django.utils.translation import ugettext_lazy as _
27 from newtagging import managers
28 from os import path
29
30
31 picture_storage = FileSystemStorage(location=path.join(
32         settings.MEDIA_ROOT, 'pictures'),
33         base_url=settings.MEDIA_URL + "pictures/")
34
35
36 class PictureArea(models.Model):
37     picture = models.ForeignKey('picture.Picture', related_name='areas')
38     area = jsonfield.JSONField(_('area'), default={}, editable=False)
39     kind = models.CharField(
40         _('kind'), max_length=10, blank=False, null=False, db_index=True,
41         choices=(('thing', _('thing')), ('theme', _('theme'))))
42
43     objects = models.Manager()
44     tagged = managers.ModelTaggedItemManager(catalogue.models.Tag)
45     tags = managers.TagDescriptor(catalogue.models.Tag)
46     tag_relations = GenericRelation(catalogue.models.Tag.intermediary_table_model)
47
48     short_html_url_name = 'picture_area_short'
49
50     @classmethod
51     def rectangle(cls, picture, kind, coords):
52         pa = PictureArea()
53         pa.picture = picture
54         pa.kind = kind
55         pa.area = coords
56         return pa
57
58     def flush_includes(self, languages=True):
59         if not languages:
60             return
61         if languages is True:
62             languages = [lc for (lc, _ln) in settings.LANGUAGES]
63         flush_ssi_includes([
64             template % (self.pk, lang)
65             for template in [
66                 '/katalog/pa/%d/short.%s.html',
67                 ]
68             for lang in languages
69             ])
70
71
72 class Picture(models.Model):
73     """
74     Picture resource.
75
76     """
77     title = models.CharField(_('title'), max_length=32767)
78     slug = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
79     sort_key = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
80     sort_key_author = models.CharField(
81         _('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
82     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
83     changed_at = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
84     xml_file = models.FileField(_('xml file'), upload_to="xml", storage=picture_storage)
85     image_file = ImageField(_('image file'), upload_to="images", storage=picture_storage)
86     html_file = models.FileField(_('html file'), upload_to="html", storage=picture_storage)
87     areas_json = jsonfield.JSONField(_('picture areas JSON'), default={}, editable=False)
88     extra_info = jsonfield.JSONField(_('extra information'), default={})
89     culturepl_link = models.CharField(blank=True, max_length=240)
90     wiki_link = models.CharField(blank=True, max_length=240)
91
92     width = models.IntegerField(null=True)
93     height = models.IntegerField(null=True)
94
95     objects = models.Manager()
96     tagged = managers.ModelTaggedItemManager(catalogue.models.Tag)
97     tags = managers.TagDescriptor(catalogue.models.Tag)
98     tag_relations = GenericRelation(catalogue.models.Tag.intermediary_table_model)
99
100     short_html_url_name = 'picture_short'
101
102     class AlreadyExists(Exception):
103         pass
104
105     class Meta:
106         ordering = ('sort_key_author', 'sort_key')
107
108         verbose_name = _('picture')
109         verbose_name_plural = _('pictures')
110
111     def save(self, force_insert=False, force_update=False, **kwargs):
112         from sortify import sortify
113
114         self.sort_key = sortify(self.title)[:120]
115
116         try:
117             author = self.authors().first().sort_key
118         except AttributeError:
119             author = u''
120         self.sort_key_author = author
121
122         ret = super(Picture, self).save(force_insert, force_update)
123
124         return ret
125
126     def __unicode__(self):
127         return self.title
128
129     def authors(self):
130         return self.tags.filter(category='author')
131
132     def tag_unicode(self, category):
133         relations = prefetched_relations(self, category)
134         if relations:
135             return ', '.join(rel.tag.name for rel in relations)
136         else:
137             return ', '.join(self.tags.filter(category=category).values_list('name', flat=True))
138
139     def author_unicode(self):
140         return self.tag_unicode('author')
141
142     @permalink
143     def get_absolute_url(self):
144         return 'picture.views.picture_detail', [self.slug]
145
146     def get_initial(self):
147         try:
148             return re.search(r'\w', self.title, re.U).group(0)
149         except AttributeError:
150             return ''
151
152     def get_next(self):
153         try:
154             return type(self).objects.filter(sort_key__gt=self.sort_key)[0]
155         except IndexError:
156             return None
157
158     def get_previous(self):
159         try:
160             return type(self).objects.filter(sort_key__lt=self.sort_key).order_by('-sort_key')[0]
161         except IndexError:
162             return None
163
164     @classmethod
165     def from_xml_file(cls, xml_file, image_file=None, image_store=None, overwrite=False):
166         """
167         Import xml and it's accompanying image file.
168         If image file is missing, it will be fetched by librarian.picture.ImageStore
169         which looks for an image file in the same directory the xml is, with extension matching
170         its mime type.
171         """
172         from sortify import sortify
173         from django.core.files import File
174         from librarian.picture import WLPicture, ImageStore
175         close_xml_file = False
176         close_image_file = False
177
178         if image_file is not None and not isinstance(image_file, File):
179             image_file = File(open(image_file))
180             close_image_file = True
181
182         if not isinstance(xml_file, File):
183             xml_file = File(open(xml_file))
184             close_xml_file = True
185
186         with transaction.atomic():
187             # use librarian to parse meta-data
188             if image_store is None:
189                 image_store = ImageStore(picture_storage.path('images'))
190             picture_xml = WLPicture.from_file(xml_file, image_store=image_store)
191
192             picture, created = Picture.objects.get_or_create(slug=picture_xml.slug[:120])
193             if not created and not overwrite:
194                 raise Picture.AlreadyExists('Picture %s already exists' % picture_xml.slug)
195
196             picture.areas.all().delete()
197             picture.title = unicode(picture_xml.picture_info.title)
198             picture.extra_info = picture_xml.picture_info.to_dict()
199
200             picture_tags = set(catalogue.models.Tag.tags_from_info(picture_xml.picture_info))
201             for tag in picture_tags:
202                 if not tag.for_pictures:
203                     tag.for_pictures = True
204                     tag.save()
205
206             area_data = {'themes': {}, 'things': {}}
207
208             # Treat all names in picture XML as in default language.
209             lang = settings.LANGUAGE_CODE
210
211             for part in picture_xml.partiter():
212                 if picture_xml.frame:
213                     c = picture_xml.frame[0]
214                     part['coords'] = [[p[0] - c[0], p[1] - c[1]] for p in part['coords']]
215                 if part.get('object', None) is not None:
216                     _tags = set()
217                     for objname in part['object'].split(','):
218                         objname = objname.strip()
219                         assert objname, 'Empty object name'
220                         # str.capitalize() is wrong, because it also lowers letters
221                         objname = objname[0].upper() + objname[1:]
222                         tag, created = catalogue.models.Tag.objects.get_or_create(
223                             slug=slughifi(objname), category='thing')
224                         if created:
225                             tag.name = objname
226                             setattr(tag, 'name_%s' % lang, tag.name)
227                             tag.sort_key = sortify(tag.name)
228                             tag.for_pictures = True
229                             tag.save()
230                         area_data['things'][tag.slug] = {
231                             'object': objname,
232                             'coords': part['coords'],
233                             }
234
235                         _tags.add(tag)
236                         if not tag.for_pictures:
237                             tag.for_pictures = True
238                             tag.save()
239                     area = PictureArea.rectangle(picture, 'thing', part['coords'])
240                     area.save()
241                     # WTF thing area does not inherit tags from picture and theme area does, is it intentional?
242                     area.tags = _tags
243                 else:
244                     _tags = set()
245                     for motifs in part['themes']:
246                         for motif in motifs.split(','):
247                             tag, created = catalogue.models.Tag.objects.get_or_create(
248                                 slug=slughifi(motif), category='theme')
249                             if created:
250                                 tag.name = motif
251                                 tag.sort_key = sortify(tag.name)
252                                 tag.for_pictures = True
253                                 tag.save()
254                             # motif_tags.add(tag)
255                             _tags.add(tag)
256                             if not tag.for_pictures:
257                                 tag.for_pictures = True
258                                 tag.save()
259                             area_data['themes'][tag.slug] = {
260                                 'theme': motif,
261                                 'coords': part['coords']
262                                 }
263
264                     logging.debug("coords for theme: %s" % part['coords'])
265                     area = PictureArea.rectangle(picture, 'theme', part['coords'])
266                     area.save()
267                     area.tags = _tags.union(picture_tags)
268
269             picture.tags = picture_tags
270             picture.areas_json = area_data
271
272             if image_file is not None:
273                 img = image_file
274             else:
275                 img = picture_xml.image_file()
276
277             modified = cls.crop_to_frame(picture_xml, img)
278             modified = cls.add_source_note(picture_xml, modified)
279
280             picture.width, picture.height = modified.size
281
282             modified_file = StringIO()
283             modified.save(modified_file, format='JPEG', quality=95)
284             # FIXME: hardcoded extension - detect from DC format or orginal filename
285             picture.image_file.save(path.basename(picture_xml.image_path), File(modified_file))
286
287             picture.xml_file.save("%s.xml" % picture.slug, File(xml_file))
288             picture.save()
289             tasks.generate_picture_html(picture.id)
290
291         if close_xml_file:
292             xml_file.close()
293         if close_image_file:
294             image_file.close()
295
296         return picture
297
298     @classmethod
299     def crop_to_frame(cls, wlpic, image_file):
300         img = Image.open(image_file)
301         if wlpic.frame is None or wlpic.frame == [[0, 0], [-1, -1]]:
302             return img
303         img = img.crop(itertools.chain(*wlpic.frame))
304         return img
305
306     @staticmethod
307     def add_source_note(wlpic, img):
308         from PIL import ImageDraw, ImageFont
309         from librarian import get_resource
310
311         annotated = Image.new(img.mode, (img.size[0], img.size[1] + 40), (255, 255, 255))
312         annotated.paste(img, (0, 0))
313         annotation = Image.new('RGB', (img.size[0] * 3, 120), (255, 255, 255))
314         ImageDraw.Draw(annotation).text(
315             (30, 15),
316             wlpic.picture_info.source_name,
317             (0, 0, 0),
318             font=ImageFont.truetype(get_resource("fonts/DejaVuSerif.ttf"), 75)
319         )
320         annotated.paste(annotation.resize((img.size[0], 40), Image.ANTIALIAS), (0, img.size[1]))
321         return annotated
322
323     # WTF/unused
324     @classmethod
325     def picture_list(cls, filter=None):
326         """Generates a hierarchical listing of all pictures
327         Pictures are optionally filtered with a test function.
328         """
329
330         pics = cls.objects.all().order_by('sort_key').only('title', 'slug', 'image_file')
331
332         if filter:
333             pics = pics.filter(filter).distinct()
334
335         pics_by_author = SortedDict()
336         orphans = []
337         for tag in catalogue.models.Tag.objects.filter(category='author'):
338             pics_by_author[tag] = []
339
340         for pic in pics.iterator():
341             authors = list(pic.authors().only('pk'))
342             if authors:
343                 for author in authors:
344                     pics_by_author[author].append(pic)
345             else:
346                 orphans.append(pic)
347
348         return pics_by_author, orphans
349
350     @property
351     def info(self):
352         if not hasattr(self, '_info'):
353             from librarian import dcparser
354             from librarian import picture
355             info = dcparser.parse(self.xml_file.path, picture.PictureInfo)
356             self._info = info
357         return self._info
358
359     def pretty_title(self, html_links=False):
360         names = [(tag.name, tag.get_absolute_url()) for tag in self.authors().only('name', 'category', 'slug')]
361         names.append((self.title, self.get_absolute_url()))
362
363         if html_links:
364             names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
365         else:
366             names = [tag[0] for tag in names]
367         return ', '.join(names)
368
369     def related_themes(self):
370         return catalogue.models.Tag.objects.usage_for_queryset(
371             self.areas.all(), counts=True).filter(category__in=('theme', 'thing'))
372
373     def flush_includes(self, languages=True):
374         if not languages:
375             return
376         if languages is True:
377             languages = [lc for (lc, _ln) in settings.LANGUAGES]
378         flush_ssi_includes([
379             template % (self.pk, lang)
380             for template in [
381                 '/katalog/p/%d/short.%s.html',
382                 '/katalog/p/%d/mini.%s.html',
383                 ]
384             for lang in languages
385             ])