lesmianator continuations cache fixes
[wolnelektury.git] / apps / lesmianator / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import cPickle
6 from datetime import datetime
7 from random import randint
8 from StringIO import StringIO
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.db.models import permalink
13 from django.utils.translation import ugettext_lazy as _
14 from django.core.urlresolvers import reverse
15 from django.db.models.signals import m2m_changed
16 from django.contrib.auth.models import User
17 from django.contrib.contenttypes.models import ContentType
18 from django.contrib.contenttypes import generic
19 from django.conf import settings
20
21 from librarian import text
22 from catalogue.fields import JSONField
23 from catalogue.models import Book, Tag
24
25
26 class Poem(models.Model):
27     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
28     text = models.TextField(_('text'))
29     created_by = models.ForeignKey(User, null=True)
30     created_from = JSONField(_('extra information'), null=True, blank=True)
31     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
32     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
33     view_count = models.IntegerField(_('view count'), default=1)
34
35     try:
36         f = open(settings.LESMIANATOR_PICKLE)
37         global_dictionary = cPickle.load(f)
38         f.close()
39     except:
40         global_dictionary = {}
41
42     def visit(self):
43         self.view_count += 1
44         self.seen_at = datetime.now()
45         self.save()
46
47     def __unicode__(self):
48         return "%s (%s...)" % (self.slug, self.text[:20])
49
50     @classmethod
51     def write(cls, continuations=None, length=3, maxlen=1000):
52         def choose_word(word, continuations):
53             try:
54                 choices = sum((continuations[word][post] for post in continuations[word]))
55                 r = randint(0, choices - 1)
56
57                 for post in continuations[word]:
58                     r -= continuations[word][post]
59                     if r < 0:
60                         return post
61             except KeyError:
62                 return ''
63
64
65         if continuations is None:
66             continuations = cls.global_dictionary
67
68         letters = []
69         word = u''
70         empty = -10
71         lines = 0
72         if not continuations:
73             maxlen = 0
74         # want at least two lines, but let Lesmianator end his stanzas
75         while (empty < 2 or lines < 2) and maxlen:
76             letter = choose_word(word, continuations)
77             letters.append(letter)
78             word = word[-length+1:] + letter
79             if letter == u'\n':
80                 # count non-empty lines
81                 if empty == 0:
82                     lines += 1
83                 # 
84                 if lines >= 2:
85                     empty += 1
86                 lines += 1
87             else:
88                 empty = 0
89             maxlen -= 1
90
91         return ''.join(letters).strip()
92
93     def get_absolute_url(self):
94         return reverse('get_poem', kwargs={'poem': self.slug})
95
96
97 class Continuations(models.Model):
98     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
99     content_type = models.ForeignKey(ContentType)
100     object_id = models.PositiveIntegerField()
101     content_object = generic.GenericForeignKey('content_type', 'object_id')
102
103     class Meta:
104         unique_together = (('content_type', 'object_id'), )
105
106     def __unicode__(self):
107         return "Continuations for: %s" % unicode(self.content_object)
108
109     @staticmethod
110     def join_conts(a, b):
111         for pre in b:
112             a.setdefault(pre, {})
113             for post in b[pre]:
114                 a[pre].setdefault(post, 0)
115                 a[pre][post] += b[pre][post]
116         return a
117
118     @classmethod
119     def for_book(cls, book, length=3):
120         # count from this book only
121         output = StringIO()
122         f = open(book.xml_file.path)
123         text.transform(f, output, False, ('raw-text',))
124         f.close()
125         conts = {}
126         last_word = ''
127         for letter in output.getvalue().decode('utf-8').strip().lower():
128             mydict = conts.setdefault(last_word, {})
129             mydict.setdefault(letter, 0)
130             mydict[letter] += 1
131             last_word = last_word[-length+1:] + letter
132         # add children
133         return reduce(cls.join_conts, 
134                       (cls.get(child) for child in book.children.all()),
135                       conts)
136
137     @classmethod
138     def for_set(cls, tag):
139         # book contains its descendants, we don't want them twice
140         books = Book.tagged.with_any((tag,))
141         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
142         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
143         if descendants_keys:
144             books = books.exclude(pk__in=descendants_keys)
145
146         cont_tabs = (cls.get(b) for b in books)
147         return reduce(cls.join_conts, cont_tabs)
148
149     @classmethod
150     def get(cls, sth):
151         object_type = ContentType.objects.get_for_model(sth)
152         should_keys = set([sth.id])
153         if isinstance(sth, Tag):
154             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)))
155         try:
156             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
157             if not obj.pickle:
158                 raise cls.DoesNotExist
159             f = open(obj.pickle.path)
160             keys, conts = cPickle.load(f)
161             f.close()
162             if set(keys) != should_keys:
163                 raise cls.DoesNotExist
164             return conts
165         except cls.DoesNotExist:
166             if isinstance(sth, Book):
167                 conts = cls.for_book(sth)
168             elif isinstance(sth, Tag):
169                 conts = cls.for_set(sth)
170             else:
171                 raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
172
173             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
174             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
175             c.save()
176             return conts
177
178