Str/bytes fix for publishing audiobooks.
[wolnelektury.git] / src / lesmianator / models.py
1 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
3 #
4 from functools import reduce
5 import pickle
6 from pickle import PickleError
7 from datetime import datetime
8 from random import randint
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.utils.timezone import utc
13 from django.utils.translation import ugettext_lazy as _
14 from django.core.urlresolvers import reverse
15 from django.contrib.auth.models import User
16 from django.contrib.contenttypes.models import ContentType
17 from django.contrib.contenttypes.fields import GenericForeignKey
18 from django.conf import settings
19
20 from jsonfield import JSONField
21 from catalogue.models import Book, Tag
22
23
24 class Poem(models.Model):
25     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
26     text = models.TextField(_('text'))
27     created_by = models.ForeignKey(User, null=True)
28     created_from = JSONField(_('extra information'), null=True, blank=True)
29     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
30     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
31     view_count = models.IntegerField(_('view count'), default=1)
32
33     try:
34         f = open(settings.LESMIANATOR_PICKLE, 'rb')
35         global_dictionary = pickle.load(f)
36         f.close()
37     except (IOError, AttributeError, PickleError):
38         global_dictionary = {}
39
40     def visit(self):
41         self.view_count += 1
42         self.seen_at = datetime.utcnow().replace(tzinfo=utc)
43         self.save()
44
45     def __str__(self):
46         return "%s (%s...)" % (self.slug, self.text[:20])
47
48     @staticmethod
49     def choose_letter(word, continuations):
50         if word not in continuations:
51             return u'\n'
52
53         choices = sum((continuations[word][letter]
54                        for letter in continuations[word]))
55         r = randint(0, choices - 1)
56
57         for letter in continuations[word]:
58             r -= continuations[word][letter]
59             if r < 0:
60                 return letter
61
62     @classmethod
63     def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
64         if continuations is None:
65             continuations = cls.global_dictionary
66         if not continuations:
67             return ''
68
69         letters = []
70         word = u''
71
72         finished_stanza_verses = 0
73         current_stanza_verses = 0
74         verse_start = True
75
76         char_count = 0
77
78         # do `min_lines' non-empty verses and then stop,
79         # but let Lesmianator finish his last stanza.
80         while finished_stanza_verses < min_lines and char_count < maxlen:
81             letter = cls.choose_letter(word, continuations)
82             letters.append(letter)
83             word = word[-length + 1:] + letter
84             char_count += 1
85
86             if letter == u'\n':
87                 if verse_start:
88                     finished_stanza_verses += current_stanza_verses
89                     current_stanza_verses = 0
90                 else:
91                     current_stanza_verses += 1
92                     verse_start = True
93             else:
94                 verse_start = False
95
96         return ''.join(letters).strip()
97
98     def get_absolute_url(self):
99         return reverse('get_poem', kwargs={'poem': self.slug})
100
101
102 class Continuations(models.Model):
103     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
104     content_type = models.ForeignKey(ContentType)
105     object_id = models.PositiveIntegerField()
106     content_object = GenericForeignKey('content_type', 'object_id')
107
108     class Meta:
109         unique_together = (('content_type', 'object_id'), )
110
111     def __str__(self):
112         return "Continuations for: %s" % str(self.content_object)
113
114     @staticmethod
115     def join_conts(a, b):
116         for pre in b:
117             a.setdefault(pre, {})
118             for post in b[pre]:
119                 a[pre].setdefault(post, 0)
120                 a[pre][post] += b[pre][post]
121         return a
122
123     @classmethod
124     def for_book(cls, book, length=3):
125         # count from this book only
126         wldoc = book.wldocument(parse_dublincore=False)
127         output = wldoc.as_text(('raw-text',)).get_bytes()
128         del wldoc
129
130         conts = {}
131         last_word = ''
132         for letter in output.decode('utf-8').strip().lower():
133             mydict = conts.setdefault(last_word, {})
134             mydict.setdefault(letter, 0)
135             mydict[letter] += 1
136             last_word = last_word[-length+1:] + letter
137         # add children
138         return reduce(cls.join_conts,
139                       (cls.get(child) for child in book.children.all().iterator()),
140                       conts)
141
142     @classmethod
143     def for_set(cls, tag):
144         books = Book.tagged_top_level([tag])
145         cont_tabs = (cls.get(b) for b in books.iterator())
146         return reduce(cls.join_conts, cont_tabs)
147
148     @classmethod
149     def get(cls, sth):
150         object_type = ContentType.objects.get_for_model(sth)
151         should_keys = {sth.id}
152         if isinstance(sth, Tag):
153             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
154         try:
155             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
156             if not obj.pickle:
157                 raise cls.DoesNotExist
158             f = open(obj.pickle.path, 'rb')
159             keys, conts = pickle.load(f)
160             f.close()
161             if set(keys) != should_keys:
162                 raise cls.DoesNotExist
163             return conts
164         except cls.DoesNotExist:
165             if isinstance(sth, Book):
166                 conts = cls.for_book(sth)
167             elif isinstance(sth, Tag):
168                 conts = cls.for_set(sth)
169             else:
170                 raise NotImplementedError('Lesmianator continuations: only Book and Tag supported')
171
172             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
173             c.pickle.save(sth.slug+'.p', ContentFile(pickle.dumps((should_keys, conts))))
174             c.save()
175             return conts