add a filter
[wolnelektury.git] / src / lesmianator / models.py
1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
3 #
4 from functools import reduce
5 import pickle
6 from pickle import PickleError
7 from datetime import datetime
8 from random import randint
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.utils.timezone import utc
13 from django.contrib.auth.models import User
14 from django.contrib.contenttypes.models import ContentType
15 from django.contrib.contenttypes.fields import GenericForeignKey
16 from django.conf import settings
17 from django.urls import reverse
18
19 from catalogue.models import Book, Tag
20
21
22 class Poem(models.Model):
23     slug = models.SlugField('slug', max_length=120, db_index=True)
24     text = models.TextField('tekst')
25     created_by = models.ForeignKey(User, models.SET_NULL, null=True)
26     created_from = models.TextField('dodatkowe informacje', null=True, blank=True)
27     created_at = models.DateTimeField('data utworzenia', auto_now_add=True, editable=False)
28     seen_at = models.DateTimeField('data ostatniego obejrzenia', auto_now_add=True, editable=False)
29     view_count = models.IntegerField('licznik obejrzeń', default=1)
30
31     try:
32         f = open(settings.LESMIANATOR_PICKLE, 'rb')
33         global_dictionary = pickle.load(f)
34         f.close()
35     except (IOError, AttributeError, PickleError):
36         global_dictionary = {}
37
38     def visit(self):
39         self.view_count += 1
40         self.seen_at = datetime.utcnow().replace(tzinfo=utc)
41         self.save()
42
43     def __str__(self):
44         return "%s (%s...)" % (self.slug, self.text[:20])
45
46     @staticmethod
47     def choose_letter(word, continuations):
48         if word not in continuations:
49             return '\n'
50
51         choices = sum((continuations[word][letter]
52                        for letter in continuations[word]))
53         r = randint(0, choices - 1)
54
55         for letter in continuations[word]:
56             r -= continuations[word][letter]
57             if r < 0:
58                 return letter
59
60     @classmethod
61     def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
62         if continuations is None:
63             continuations = cls.global_dictionary
64         if not continuations:
65             return ''
66
67         letters = []
68         word = ''
69
70         finished_stanza_verses = 0
71         current_stanza_verses = 0
72         verse_start = True
73
74         char_count = 0
75
76         # do `min_lines' non-empty verses and then stop,
77         # but let Lesmianator finish his last stanza.
78         while finished_stanza_verses < min_lines and char_count < maxlen:
79             letter = cls.choose_letter(word, continuations)
80             letters.append(letter)
81             word = word[-length + 1:] + letter
82             char_count += 1
83
84             if letter == '\n':
85                 if verse_start:
86                     finished_stanza_verses += current_stanza_verses
87                     current_stanza_verses = 0
88                 else:
89                     current_stanza_verses += 1
90                     verse_start = True
91             else:
92                 verse_start = False
93
94         return ''.join(letters).strip()
95
96     def get_absolute_url(self):
97         return reverse('get_poem', kwargs={'poem': self.slug})
98
99
100 class Continuations(models.Model):
101     pickle = models.FileField('plik kontynuacji', upload_to='lesmianator')
102     content_type = models.ForeignKey(ContentType, models.CASCADE)
103     object_id = models.PositiveIntegerField()
104     content_object = GenericForeignKey('content_type', 'object_id')
105
106     class Meta:
107         unique_together = (('content_type', 'object_id'), )
108
109     def __str__(self):
110         return "Continuations for: %s" % str(self.content_object)
111
112     @staticmethod
113     def join_conts(a, b):
114         for pre in b:
115             a.setdefault(pre, {})
116             for post in b[pre]:
117                 a[pre].setdefault(post, 0)
118                 a[pre][post] += b[pre][post]
119         return a
120
121     @classmethod
122     def for_book(cls, book, length=3):
123         # count from this book only
124         wldoc = book.wldocument(parse_dublincore=False)
125         output = wldoc.as_text(('raw-text',)).get_bytes()
126         del wldoc
127
128         conts = {}
129         last_word = ''
130         for letter in output.decode('utf-8').strip().lower():
131             mydict = conts.setdefault(last_word, {})
132             mydict.setdefault(letter, 0)
133             mydict[letter] += 1
134             last_word = last_word[-length+1:] + letter
135         # add children
136         return reduce(cls.join_conts,
137                       (cls.get(child) for child in book.children.all().iterator()),
138                       conts)
139
140     @classmethod
141     def for_set(cls, tag):
142         books = Book.tagged_top_level([tag])
143         cont_tabs = (cls.get(b) for b in books.iterator())
144         return reduce(cls.join_conts, cont_tabs)
145
146     @classmethod
147     def get(cls, sth):
148         object_type = ContentType.objects.get_for_model(sth)
149         should_keys = {sth.id}
150         if isinstance(sth, Tag):
151             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
152         try:
153             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
154             if not obj.pickle:
155                 raise cls.DoesNotExist
156             f = open(obj.pickle.path, 'rb')
157             keys, conts = pickle.load(f)
158             f.close()
159             if set(keys) != should_keys:
160                 raise cls.DoesNotExist
161             return conts
162         except cls.DoesNotExist:
163             if isinstance(sth, Book):
164                 conts = cls.for_book(sth)
165             elif isinstance(sth, Tag):
166                 conts = cls.for_set(sth)
167             else:
168                 raise NotImplementedError('Lesmianator continuations: only Book and Tag supported')
169
170             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
171             c.pickle.save(sth.slug+'.p', ContentFile(pickle.dumps((should_keys, conts))))
172             c.save()
173             return conts