Fix Picture import: split theme names, more space for titles.
[wolnelektury.git] / apps / picture / models.py
index 8aa6f97..cc29f85 100644 (file)
@@ -1,23 +1,70 @@
-from django.db import models
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+from django.db import models, transaction
 import catalogue.models
 from django.db.models import permalink
 from sorl.thumbnail import ImageField
 from django.conf import settings
+from django.contrib.contenttypes.fields import GenericRelation
 from django.core.files.storage import FileSystemStorage
 from django.utils.datastructures import SortedDict
-from django.template.loader import render_to_string
-from django.core.cache import cache
-from catalogue.utils import split_tags
-from django.utils.safestring import mark_safe
-from librarian import dcparser
-from slughifi import slughifi
+from fnpdjango.utils.text.slughifi import slughifi
+from ssify import flush_ssi_includes
+from picture import tasks
+from StringIO import StringIO
+import jsonfield
+import itertools
+import logging
+
+from PIL import Image
 
 from django.utils.translation import ugettext_lazy as _
 from newtagging import managers
 from os import path
 
 
-picture_storage = FileSystemStorage(location=path.join(settings.MEDIA_ROOT, 'pictures'), base_url=settings.MEDIA_URL + "pictures/")
+picture_storage = FileSystemStorage(location=path.join(
+        settings.MEDIA_ROOT, 'pictures'),
+        base_url=settings.MEDIA_URL + "pictures/")
+
+
+class PictureArea(models.Model):
+    picture = models.ForeignKey('picture.Picture', related_name='areas')
+    area = jsonfield.JSONField(_('area'), default={}, editable=False)
+    kind = models.CharField(_('kind'), max_length=10, blank=False,
+                           null=False, db_index=True,
+                           choices=(('thing', _('thing')),
+                                    ('theme', _('theme'))))
+
+    objects     = models.Manager()
+    tagged      = managers.ModelTaggedItemManager(catalogue.models.Tag)
+    tags        = managers.TagDescriptor(catalogue.models.Tag)
+    tag_relations = GenericRelation(catalogue.models.Tag.intermediary_table_model)
+
+    short_html_url_name = 'picture_area_short'
+
+    @classmethod
+    def rectangle(cls, picture, kind, coords):
+        pa = PictureArea()
+        pa.picture = picture
+        pa.kind = kind
+        pa.area = coords
+        return pa
+
+    def flush_includes(self, languages=True):
+        if not languages:
+            return
+        if languages is True:
+            languages = [lc for (lc, _ln) in settings.LANGUAGES]
+        flush_ssi_includes([
+            template % (self.pk, lang)
+            for template in [
+                '/katalog/pa/%d/short.%s.html',
+                ]
+            for lang in languages
+            ])
 
 
 class Picture(models.Model):
@@ -25,16 +72,29 @@ class Picture(models.Model):
     Picture resource.
 
     """
-    title       = models.CharField(_('title'), max_length=120)
+    title       = models.CharField(_('title'), max_length=255)
     slug        = models.SlugField(_('slug'), max_length=120, db_index=True, unique=True)
     sort_key    = models.CharField(_('sort key'), max_length=120, db_index=True, editable=False)
+    sort_key_author = models.CharField(_('sort key by author'), max_length=120, db_index=True, editable=False, default=u'')
     created_at  = models.DateTimeField(_('creation date'), auto_now_add=True, db_index=True)
     changed_at  = models.DateTimeField(_('creation date'), auto_now=True, db_index=True)
     xml_file    = models.FileField('xml_file', upload_to="xml", storage=picture_storage)
     image_file  = ImageField(_('image_file'), upload_to="images", storage=picture_storage)
+    html_file   = models.FileField('html_file', upload_to="html", storage=picture_storage)
+    areas_json       = jsonfield.JSONField(_('picture areas JSON'), default={}, editable=False)
+    extra_info    = jsonfield.JSONField(_('extra information'), default={})
+    culturepl_link   = models.CharField(blank=True, max_length=240)
+    wiki_link     = models.CharField(blank=True, max_length=240)
+
+    width       = models.IntegerField(null=True)
+    height      = models.IntegerField(null=True)
+
     objects     = models.Manager()
     tagged      = managers.ModelTaggedItemManager(catalogue.models.Tag)
     tags        = managers.TagDescriptor(catalogue.models.Tag)
+    tag_relations = GenericRelation(catalogue.models.Tag.intermediary_table_model)
+
+    short_html_url_name = 'picture_short'
 
     class AlreadyExists(Exception):
         pass
@@ -45,18 +105,18 @@ class Picture(models.Model):
         verbose_name = _('picture')
         verbose_name_plural = _('pictures')
 
-    URLID_RE = r'[a-z0-9-]+'
-    FILEID_RE = r'[a-z0-9-]+'
-
-    def save(self, force_insert=False, force_update=False, reset_short_html=True, **kwargs):
+    def save(self, force_insert=False, force_update=False, **kwargs):
         from sortify import sortify
 
         self.sort_key = sortify(self.title)
 
-        ret = super(Picture, self).save(force_insert, force_update)
+        try:
+            author = self.tags.filter(category='author')[0].sort_key
+        except IndexError:
+            author = u''
+        self.sort_key_author = author
 
-        if reset_short_html:
-            self.reset_short_html()
+        ret = super(Picture, self).save(force_insert, force_update)
 
         return ret
 
@@ -65,13 +125,10 @@ class Picture(models.Model):
 
     @permalink
     def get_absolute_url(self):
-        return ('picture.views.picture_detail', [self.urlid()])
-
-    def urlid(self):
-        return self.slug
+        return ('picture.views.picture_detail', [self.slug])
 
     @classmethod
-    def from_xml_file(cls, xml_file, image_file=None, overwrite=False):
+    def from_xml_file(cls, xml_file, image_file=None, image_store=None, overwrite=False):
         """
         Import xml and it's accompanying image file.
         If image file is missing, it will be fetched by librarian.picture.ImageStore
@@ -83,10 +140,7 @@ class Picture(models.Model):
         from librarian.picture import WLPicture, ImageStore
         close_xml_file = False
         close_image_file = False
-        # class SimpleImageStore(object):
-        #     def path(self_, slug, mime_type):
-        #         """Returns the image file. Ignores slug ad mime_type."""
-        #         return image_file
+
 
         if image_file is not None and not isinstance(image_file, File):
             image_file = File(open(image_file))
@@ -98,46 +152,133 @@ class Picture(models.Model):
 
         try:
             # use librarian to parse meta-data
-            picture_xml = WLPicture.from_file(xml_file,
-                                              image_store=ImageStore(picture_storage.path('images')))
-                    # image_store=SimpleImageStore
+            if image_store is None:
+                image_store = ImageStore(picture_storage.path('images'))
+            picture_xml = WLPicture.from_file(xml_file, image_store=image_store)
 
             picture, created = Picture.objects.get_or_create(slug=picture_xml.slug)
             if not created and not overwrite:
                 raise Picture.AlreadyExists('Picture %s already exists' % picture_xml.slug)
 
-            picture.title = picture_xml.picture_info.title
+            picture.areas.all().delete()
+            picture.title = unicode(picture_xml.picture_info.title)
+            picture.extra_info = picture_xml.picture_info.to_dict()
 
+            picture_tags = set(catalogue.models.Tag.tags_from_info(picture_xml.picture_info))
             motif_tags = set()
-            for part in picture_xml.partiter():
-                for motif in part['themes']:
-                    tag, created = catalogue.models.Tag.objects.get_or_create(slug=slughifi(motif), category='theme')
-                    if created:
-                        tag.name = motif
-                        tag.sort_key = sortify(tag.name)
-                        tag.save()
-                    motif_tags.add(tag)
+            thing_tags = set()
 
-            picture.tags = catalogue.models.Tag.tags_from_info(picture_xml.picture_info) + \
-                list(motif_tags)
+            area_data = {'themes':{}, 'things':{}}
+
+            for part in picture_xml.partiter():
+                if picture_xml.frame:
+                    c = picture_xml.frame[0]
+                    part['coords'] = [[p[0] - c[0], p[1] - c[1]] for p in part['coords']]
+                if part.get('object', None) is not None:
+                    _tags = set()
+                    for objname in part['object'].split(','):
+                        objname = objname.strip()
+                        tag, created = catalogue.models.Tag.objects.get_or_create(slug=slughifi(objname), category='thing')
+                        if created:
+                            tag.name = objname
+                            tag.sort_key = sortify(tag.name)
+                            tag.save()
+                        #thing_tags.add(tag)
+                        area_data['things'][tag.slug] = {
+                            'object': part['object'],
+                            'coords': part['coords'],
+                            }
+
+                        _tags.add(tag)
+                    area = PictureArea.rectangle(picture, 'thing', part['coords'])
+                    area.save()
+                    area.tags = _tags
+                else:
+                    _tags = set()
+                    for motifs in part['themes']:
+                        for motif in motifs.split(','):
+                            tag, created = catalogue.models.Tag.objects.get_or_create(slug=slughifi(motif), category='theme')
+                            if created:
+                                tag.name = motif
+                                tag.sort_key = sortify(tag.name)
+                                tag.save()
+                            #motif_tags.add(tag)
+                            _tags.add(tag)
+                            area_data['themes'][tag.slug] = {
+                                'theme': motif,
+                                'coords': part['coords']
+                                }
+
+                    logging.debug("coords for theme: %s" % part['coords'])
+                    area = PictureArea.rectangle(picture, 'theme', part['coords'])
+                    area.save()
+                    area.tags = _tags.union(picture_tags)
+
+            picture.tags = picture_tags.union(motif_tags).union(thing_tags)
+            picture.areas_json = area_data
 
             if image_file is not None:
                 img = image_file
             else:
                 img = picture_xml.image_file()
 
-            # FIXME: hardcoded extension
-            picture.image_file.save(path.basename(picture_xml.image_path), File(img))
+            modified = cls.crop_to_frame(picture_xml, img)
+            modified = cls.add_source_note(picture_xml, modified)
+
+            picture.width, picture.height = modified.size
+
+            modified_file = StringIO()
+            modified.save(modified_file, format='png', quality=95)
+            # FIXME: hardcoded extension - detect from DC format or orginal filename
+            picture.image_file.save(path.basename(picture_xml.image_path), File(modified_file))
 
             picture.xml_file.save("%s.xml" % picture.slug, File(xml_file))
             picture.save()
+            tasks.generate_picture_html(picture.id)
+
+        except Exception, ex:
+            logging.exception("Exception during import, rolling back")
+            transaction.rollback()
+            raise ex
+
         finally:
             if close_xml_file:
                 xml_file.close()
             if close_image_file:
                 image_file.close()
+
+        transaction.commit()
+
         return picture
 
+    @classmethod
+    def crop_to_frame(cls, wlpic, image_file):
+        img = Image.open(image_file)
+        if wlpic.frame is None:
+            return img
+        img = img.crop(itertools.chain(*wlpic.frame))
+        return img
+
+    @staticmethod
+    def add_source_note(wlpic, img):
+        from PIL import ImageDraw, ImageFont
+        from librarian import get_resource
+
+        annotated = Image.new(img.mode,
+                (img.size[0], img.size[1] + 40),
+                (255, 255, 255)
+            )
+        annotated.paste(img, (0, 0))
+        annotation = Image.new(img.mode, (3000, 120), (255, 255, 255))
+        ImageDraw.Draw(annotation).text(
+            (30, 15),
+            wlpic.picture_info.source_name,
+            (0, 0, 0),
+            font=ImageFont.truetype(get_resource("fonts/DejaVuSerif.ttf"), 75)
+        )
+        annotated.paste(annotation.resize((1000, 40), Image.ANTIALIAS), (0, img.size[1]))
+        return annotated
+
     @classmethod
     def picture_list(cls, filter=None):
         """Generates a hierarchical listing of all pictures
@@ -155,7 +296,7 @@ class Picture(models.Model):
         for tag in catalogue.models.Tag.objects.filter(category='author'):
             pics_by_author[tag] = []
 
-        for pic in pics:
+        for pic in pics.iterator():
             authors = list(pic.tags.filter(category='author'))
             if authors:
                 for author in authors:
@@ -168,33 +309,37 @@ class Picture(models.Model):
     @property
     def info(self):
         if not hasattr(self, '_info'):
+            from librarian import dcparser
+            from librarian import picture
             info = dcparser.parse(self.xml_file.path, picture.PictureInfo)
             self._info = info
         return self._info
 
-    def reset_short_html(self):
-        if self.id is None:
-            return
+    def pretty_title(self, html_links=False):
+        picture = self
+        names = [(tag.name, tag.get_absolute_url())
+                 for tag in self.tags.filter(category='author')]
+        names.append((self.title, self.get_absolute_url()))
 
-        cache_key = "Picture.short_html/%d" % (self.id)
-        cache.delete(cache_key)
-
-    def short_html(self):
-        if self.id:
-            cache_key = "Picture.short_html/%d" % (self.id)
-            short_html = cache.get(cache_key)
-        else:
-            short_html = None
-
-        if short_html is not None:
-            return mark_safe(short_html)
+        if html_links:
+            names = ['<a href="%s">%s</a>' % (tag[1], tag[0]) for tag in names]
         else:
-            tags = self.tags.filter(category__in=('author', 'kind', 'epoch'))
-            tags = split_tags(tags)
+            names = [tag[0] for tag in names]
+        return ', '.join(names)
 
-            short_html = unicode(render_to_string('picture/picture_short.html',
-                {'picture': self, 'tags': tags}))
+    def related_themes(self):
+        return catalogue.models.Tag.objects.usage_for_queryset(
+            self.areas.all(), counts=True).filter(category__in=('theme', 'thing'))
 
-            if self.id:
-                cache.set(cache_key, short_html, catalogue.models.CACHE_FOREVER)
-            return mark_safe(short_html)
+    def flush_includes(self, languages=True):
+        if not languages:
+            return
+        if languages is True:
+            languages = [lc for (lc, _ln) in settings.LANGUAGES]
+        flush_ssi_includes([
+            template % (self.pk, lang)
+            for template in [
+                '/katalog/p/%d/short.%s.html',
+                ]
+            for lang in languages
+            ])