lesmianator fix
[wolnelektury.git] / apps / lesmianator / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import cPickle
6 from datetime import datetime
7 from random import randint
8 from StringIO import StringIO
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.db.models import permalink
13 from django.utils.translation import ugettext_lazy as _
14 from django.core.urlresolvers import reverse
15 from django.db.models.signals import m2m_changed
16 from django.contrib.auth.models import User
17 from django.contrib.contenttypes.models import ContentType
18 from django.contrib.contenttypes import generic
19 from django.conf import settings
20
21 from librarian import text
22 from catalogue.fields import JSONField
23 from catalogue.models import Book, Tag
24
25
26 class Poem(models.Model):
27     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
28     text = models.TextField(_('text'))
29     created_by = models.ForeignKey(User, null=True)
30     created_from = JSONField(_('extra information'), null=True, blank=True)
31     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
32     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
33     view_count = models.IntegerField(_('view count'), default=1)
34
35     try:
36         f = open(settings.LESMIANATOR_PICKLE)
37         global_dictionary = cPickle.load(f)
38         f.close()
39     except:
40         global_dictionary = {}
41
42     def visit(self):
43         self.view_count += 1
44         self.seen_at = datetime.now()
45         self.save()
46
47     def __unicode__(self):
48         return "%s (%s...)" % (self.slug, self.text[:20])
49
50     @classmethod
51     def write(cls, continuations=None, length=3, maxlen=1000):
52         def choose_word(word, continuations):
53             try:
54                 choices = sum((continuations[word][post] for post in continuations[word]))
55                 r = randint(0, choices - 1)
56
57                 for post in continuations[word]:
58                     r -= continuations[word][post]
59                     if r < 0:
60                         return post
61             except KeyError:
62                 return ''
63
64
65         if continuations is None:
66             continuations = cls.global_dictionary
67
68         letters = []
69         word = u''
70         empty = -10
71         lines = 0
72         if not continuations:
73             maxlen = 0
74         # want at least two lines, but let Lesmianator end his stanzas
75         while (empty < 2 or lines < 2) and maxlen:
76             letter = choose_word(word, continuations)
77             letters.append(letter)
78             word = word[-length+1:] + letter
79             if letter == u'\n':
80                 # count non-empty lines
81                 if empty == 0:
82                     lines += 1
83                 # 
84                 if lines >= 2:
85                     empty += 1
86                 lines += 1
87             else:
88                 empty = 0
89             maxlen -= 1
90
91         return ''.join(letters).strip()
92
93     def get_absolute_url(self):
94         return reverse('get_poem', kwargs={'poem': self.slug})
95
96
97 class Continuations(models.Model):
98     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
99     content_type = models.ForeignKey(ContentType)
100     object_id = models.PositiveIntegerField()
101     content_object = generic.GenericForeignKey('content_type', 'object_id')
102
103     def __unicode__(self):
104         return "Continuations for: %s" % unicode(self.content_object)
105
106     @staticmethod
107     def join_conts(a, b):
108         for pre in b:
109             a.setdefault(pre, {})
110             for post in b[pre]:
111                 a[pre].setdefault(post, 0)
112                 a[pre][post] += b[pre][post]
113         return a
114
115     @classmethod
116     def for_book(cls, book, length=3):
117         # count from this book only
118         print 'for_book', book
119         output = StringIO()
120         f = open(book.xml_file.path)
121         text.transform(f, output, False, ('raw-text',))
122         f.close()
123         conts = {}
124         last_word = ''
125         for letter in output.getvalue().decode('utf-8').strip().lower():
126             mydict = conts.setdefault(last_word, {})
127             mydict.setdefault(letter, 0)
128             mydict[letter] += 1
129             last_word = last_word[-length+1:] + letter
130         # add children
131         return reduce(cls.join_conts, 
132                       (cls.get(child) for child in book.children.all()),
133                       conts)
134
135     @classmethod
136     def for_set(cls, tag):
137         # book contains its descendants, we don't want them twice
138         books = Book.tagged.with_any((tag,))
139         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
140         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
141         if descendants_keys:
142             books = books.exclude(pk__in=descendants_keys)
143
144         cont_tabs = (cls.get(b) for b in books)
145         return reduce(cls.join_conts, cont_tabs)
146
147     @classmethod
148     def get(cls, sth):
149         object_type = ContentType.objects.get_for_model(sth)
150         should_keys = set([sth.id])
151         if isinstance(sth, Tag):
152             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)))
153         try:
154             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
155             f = open(obj.pickle.path)
156             keys, conts = cPickle.load(f)
157             f.close()
158             if set(keys) != should_keys:
159                 obj.delete()
160                 raise cls.DoesNotExist
161             return conts
162         except cls.DoesNotExist:
163             if isinstance(sth, Book):
164                 conts = cls.for_book(sth)
165             elif isinstance(sth, Tag):
166                 conts = cls.for_set(sth)
167             else:
168                 raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
169
170             c = cls(content_object=sth)
171             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
172             c.save()
173             return conts
174
175