validate value counts in metadata + fix multiple values
[redakcja.git] / apps / catalogue / models / document.py
1 # -*- coding: utf-8 -*-
2 #
3 # This file is part of MIL/PEER, licensed under GNU Affero GPLv3 or later.
4 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 #
6 from __future__ import unicode_literals
7
8 from datetime import date
9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
11 from django.db import models
12 from django.template.loader import render_to_string
13 from django.utils.encoding import force_unicode
14 from django.utils.translation import ugettext_lazy as _
15 from dvcs.models import Ref
16 from organizations.models import Organization
17 from catalogue.constants import STAGES
18 from .tag import Tag, Category
19
20
21 def metadata_from_text(text):
22     from lxml import etree
23     metadata = {}
24     text = text.replace(u'\ufeff', '')
25     # This is bad. The editor shouldn't spew unknown HTML entities.
26     text = text.replace(u' ', u'\u00a0')
27
28     try:
29         t = etree.fromstring(text)
30     except:
31         return {'title': '<<Resource invalid>>'}
32     header = t.find('.//header')
33     if header is None:
34         header = etree.fromstring(text).find('.//{http://nowoczesnapolska.org.pl/sst#}header')
35     metadata['title'] = getattr(header, 'text', ' ') or ' '
36     # print 'meta', d['title']
37
38     m = t.find('metadata')
39     if m is None:
40         m = t.find('{http://nowoczesnapolska.org.pl/sst#}metadata')
41     if m is not None:
42         c = m.find('{http://purl.org/dc/elements/1.1/}relation.coverimage.url')
43         if c is not None:
44             metadata['cover_url'] = c.text
45         for category in Category.objects.all():
46             for elem in m.findall('{http://purl.org/dc/elements/1.1/}' + category.dc_tag):
47                 if elem.text is not None:
48                     if category.multiple:
49                         if category.dc_tag not in metadata:
50                             metadata[category.dc_tag] = []
51                         metadata[category.dc_tag].append(elem.text)
52                     else:
53                         if category.dc_tag in metadata:
54                             metadata['multiple_values'] = category.dc_tag
55                         metadata[category.dc_tag] = elem.text
56     return metadata
57
58
59 class Document(Ref):
60     """ An editable chunk of text."""
61
62     owner_user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True)
63     owner_organization = models.ForeignKey(Organization, null=True)
64     stage = models.CharField(_('stage'), max_length=128, blank=True, default=STAGES[0][0])
65     assigned_to = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, related_name='assignments')
66     deleted = models.BooleanField(default=False)
67     tags = models.ManyToManyField(Tag, blank=True)
68     # we need to know if it were ever published (for notifications)
69     published = models.BooleanField(default=False)
70
71     # Where to cache searchable stuff from metadata?
72     # Probably in some kind of search index.
73
74     class Meta:
75         verbose_name = _('document')
76         verbose_name_plural = _('documents')
77
78     def short_html(self):
79         return render_to_string('catalogue/book_list/book.html', {'book': self})
80
81     def meta(self):
82         return metadata_from_text(self.materialize())
83
84     def can_edit(self, user):
85         if user.is_superuser:
86             return True
87         if self.owner_user:
88             return self.owner_user == user
89         else:
90             return self.owner_organization.is_member(user)
91
92     def set_stage(self, stage):
93         self.stage = stage
94         plan = self.get_plan()
95         if plan is not None:
96             self.assigned_to = plan.user
97         else:
98             self.assigned_to = None
99         self.save()
100
101     def stage_name(self):
102         return force_unicode(dict(STAGES)[self.stage]) if self.stage else None
103
104     def get_plan(self):
105         try:
106             plan = self.plan_set.get(stage=self.stage)
107         except (ObjectDoesNotExist, MultipleObjectsReturned):
108             return None
109         return plan
110
111     def is_overdue(self):
112         plan = self.get_plan()
113         return plan is not None and plan.deadline and plan.deadline < date.today()
114
115     def commit(self, *args, **kwargs):
116         super(Document, self).commit(*args, **kwargs)
117         m = self.meta()
118         for category in Category.objects.all():
119             values = m.get(category.dc_tag)
120             if not category.multiple:
121                 values = [values]
122             if not values:
123                 values = []
124             tags = category.tag_set.filter(dc_value__in=values)
125             category.set_tags_for(self, tags)