Minor fix in OAI-PMH.
[wolnelektury.git] / apps / lesmianator / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import cPickle
6 from datetime import datetime
7 from random import randint
8 from StringIO import StringIO
9
10 from django.core.files.base import ContentFile
11 from django.db import models
12 from django.utils.timezone import utc
13 from django.utils.translation import ugettext_lazy as _
14 from django.core.urlresolvers import reverse
15 from django.contrib.auth.models import User
16 from django.contrib.contenttypes.models import ContentType
17 from django.contrib.contenttypes.fields import GenericForeignKey
18 from django.conf import settings
19
20 from jsonfield import JSONField
21 from catalogue.models import Book, Tag
22
23
24 class Poem(models.Model):
25     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
26     text = models.TextField(_('text'))
27     created_by = models.ForeignKey(User, null=True)
28     created_from = JSONField(_('extra information'), null=True, blank=True)
29     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
30     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
31     view_count = models.IntegerField(_('view count'), default=1)
32
33     try:
34         f = open(settings.LESMIANATOR_PICKLE)
35         global_dictionary = cPickle.load(f)
36         f.close()
37     except:
38         global_dictionary = {}
39
40     def visit(self):
41         self.view_count += 1
42         self.seen_at = datetime.utcnow().replace(tzinfo=utc)
43         self.save()
44
45     def __unicode__(self):
46         return "%s (%s...)" % (self.slug, self.text[:20])
47
48     @staticmethod
49     def choose_letter(word, continuations):
50         if word not in continuations:
51             return u'\n'
52
53         choices = sum((continuations[word][letter]
54                        for letter in continuations[word]))
55         r = randint(0, choices - 1)
56
57         for letter in continuations[word]:
58             r -= continuations[word][letter]
59             if r < 0:
60                 return letter
61
62     @classmethod
63     def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
64         if continuations is None:
65             continuations = cls.global_dictionary
66         if not continuations:
67             return ''
68
69         letters = []
70         word = u''
71
72         finished_stanza_verses = 0
73         current_stanza_verses = 0
74         verse_start = True
75
76         char_count = 0
77
78         # do `min_lines' non-empty verses and then stop,
79         # but let Lesmianator finish his last stanza.
80         while finished_stanza_verses < min_lines and char_count < maxlen:
81             letter = cls.choose_letter(word, continuations)
82             letters.append(letter)
83             word = word[-length + 1:] + letter
84             char_count += 1
85
86             if letter == u'\n':
87                 if verse_start:
88                     finished_stanza_verses += current_stanza_verses
89                     current_stanza_verses = 0
90                 else:
91                     current_stanza_verses += 1
92                     verse_start = True
93             else:
94                 verse_start = False
95
96         return ''.join(letters).strip()
97
98     def get_absolute_url(self):
99         return reverse('get_poem', kwargs={'poem': self.slug})
100
101
102 class Continuations(models.Model):
103     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
104     content_type = models.ForeignKey(ContentType)
105     object_id = models.PositiveIntegerField()
106     content_object = GenericForeignKey('content_type', 'object_id')
107
108     class Meta:
109         unique_together = (('content_type', 'object_id'), )
110
111     def __unicode__(self):
112         return "Continuations for: %s" % unicode(self.content_object)
113
114     @staticmethod
115     def join_conts(a, b):
116         for pre in b:
117             a.setdefault(pre, {})
118             for post in b[pre]:
119                 a[pre].setdefault(post, 0)
120                 a[pre][post] += b[pre][post]
121         return a
122
123     @classmethod
124     def for_book(cls, book, length=3):
125         # count from this book only
126         output = StringIO()
127         wldoc = book.wldocument(parse_dublincore=False)
128         output = wldoc.as_text(('raw-text',)).get_string()
129         del wldoc
130
131         conts = {}
132         last_word = ''
133         for letter in output.decode('utf-8').strip().lower():
134             mydict = conts.setdefault(last_word, {})
135             mydict.setdefault(letter, 0)
136             mydict[letter] += 1
137             last_word = last_word[-length+1:] + letter
138         # add children
139         return reduce(cls.join_conts,
140                       (cls.get(child) for child in book.children.all().iterator()),
141                       conts)
142
143     @classmethod
144     def for_set(cls, tag):
145         books = Book.tagged_top_level([tag])
146         cont_tabs = (cls.get(b) for b in books.iterator())
147         return reduce(cls.join_conts, cont_tabs)
148
149     @classmethod
150     def get(cls, sth):
151         object_type = ContentType.objects.get_for_model(sth)
152         should_keys = set([sth.id])
153         if isinstance(sth, Tag):
154             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
155         try:
156             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
157             if not obj.pickle:
158                 raise cls.DoesNotExist
159             f = open(obj.pickle.path)
160             keys, conts = cPickle.load(f)
161             f.close()
162             if set(keys) != should_keys:
163                 raise cls.DoesNotExist
164             return conts
165         except cls.DoesNotExist:
166             if isinstance(sth, Book):
167                 conts = cls.for_book(sth)
168             elif isinstance(sth, Tag):
169                 conts = cls.for_set(sth)
170             else:
171                 raise NotImplementedError('Lesmianator continuations: only Book and Tag supported')
172
173             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
174             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
175             c.save()
176             return conts
177
178