chunk edit fix
[wolnelektury.git] / src / lesmianator / models.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from functools import reduce
5 import pickle
6 from pickle import PickleError
7 from datetime import datetime
8 from random import randint
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.utils.timezone import utc
13 from django.utils.translation import ugettext_lazy as _
14 from django.contrib.auth.models import User
15 from django.contrib.contenttypes.models import ContentType
16 from django.contrib.contenttypes.fields import GenericForeignKey
17 from django.conf import settings
18 from django.urls import reverse
19
20 from catalogue.models import Book, Tag
21
22
23 class Poem(models.Model):
24     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
25     text = models.TextField(_('text'))
26     created_by = models.ForeignKey(User, models.SET_NULL, null=True)
27     created_from = models.TextField(_('extra information'), null=True, blank=True)
28     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
29     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
30     view_count = models.IntegerField(_('view count'), default=1)
31
32     try:
33         f = open(settings.LESMIANATOR_PICKLE, 'rb')
34         global_dictionary = pickle.load(f)
35         f.close()
36     except (IOError, AttributeError, PickleError):
37         global_dictionary = {}
38
39     def visit(self):
40         self.view_count += 1
41         self.seen_at = datetime.utcnow().replace(tzinfo=utc)
42         self.save()
43
44     def __str__(self):
45         return "%s (%s...)" % (self.slug, self.text[:20])
46
47     @staticmethod
48     def choose_letter(word, continuations):
49         if word not in continuations:
50             return '\n'
51
52         choices = sum((continuations[word][letter]
53                        for letter in continuations[word]))
54         r = randint(0, choices - 1)
55
56         for letter in continuations[word]:
57             r -= continuations[word][letter]
58             if r < 0:
59                 return letter
60
61     @classmethod
62     def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
63         if continuations is None:
64             continuations = cls.global_dictionary
65         if not continuations:
66             return ''
67
68         letters = []
69         word = ''
70
71         finished_stanza_verses = 0
72         current_stanza_verses = 0
73         verse_start = True
74
75         char_count = 0
76
77         # do `min_lines' non-empty verses and then stop,
78         # but let Lesmianator finish his last stanza.
79         while finished_stanza_verses < min_lines and char_count < maxlen:
80             letter = cls.choose_letter(word, continuations)
81             letters.append(letter)
82             word = word[-length + 1:] + letter
83             char_count += 1
84
85             if letter == '\n':
86                 if verse_start:
87                     finished_stanza_verses += current_stanza_verses
88                     current_stanza_verses = 0
89                 else:
90                     current_stanza_verses += 1
91                     verse_start = True
92             else:
93                 verse_start = False
94
95         return ''.join(letters).strip()
96
97     def get_absolute_url(self):
98         return reverse('get_poem', kwargs={'poem': self.slug})
99
100
101 class Continuations(models.Model):
102     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
103     content_type = models.ForeignKey(ContentType, models.CASCADE)
104     object_id = models.PositiveIntegerField()
105     content_object = GenericForeignKey('content_type', 'object_id')
106
107     class Meta:
108         unique_together = (('content_type', 'object_id'), )
109
110     def __str__(self):
111         return "Continuations for: %s" % str(self.content_object)
112
113     @staticmethod
114     def join_conts(a, b):
115         for pre in b:
116             a.setdefault(pre, {})
117             for post in b[pre]:
118                 a[pre].setdefault(post, 0)
119                 a[pre][post] += b[pre][post]
120         return a
121
122     @classmethod
123     def for_book(cls, book, length=3):
124         # count from this book only
125         wldoc = book.wldocument(parse_dublincore=False)
126         output = wldoc.as_text(('raw-text',)).get_bytes()
127         del wldoc
128
129         conts = {}
130         last_word = ''
131         for letter in output.decode('utf-8').strip().lower():
132             mydict = conts.setdefault(last_word, {})
133             mydict.setdefault(letter, 0)
134             mydict[letter] += 1
135             last_word = last_word[-length+1:] + letter
136         # add children
137         return reduce(cls.join_conts,
138                       (cls.get(child) for child in book.children.all().iterator()),
139                       conts)
140
141     @classmethod
142     def for_set(cls, tag):
143         books = Book.tagged_top_level([tag])
144         cont_tabs = (cls.get(b) for b in books.iterator())
145         return reduce(cls.join_conts, cont_tabs)
146
147     @classmethod
148     def get(cls, sth):
149         object_type = ContentType.objects.get_for_model(sth)
150         should_keys = {sth.id}
151         if isinstance(sth, Tag):
152             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
153         try:
154             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
155             if not obj.pickle:
156                 raise cls.DoesNotExist
157             f = open(obj.pickle.path, 'rb')
158             keys, conts = pickle.load(f)
159             f.close()
160             if set(keys) != should_keys:
161                 raise cls.DoesNotExist
162             return conts
163         except cls.DoesNotExist:
164             if isinstance(sth, Book):
165                 conts = cls.for_book(sth)
166             elif isinstance(sth, Tag):
167                 conts = cls.for_set(sth)
168             else:
169                 raise NotImplementedError('Lesmianator continuations: only Book and Tag supported')
170
171             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
172             c.pickle.save(sth.slug+'.p', ContentFile(pickle.dumps((should_keys, conts))))
173             c.save()
174             return conts