disable crawling for catalogue pages with multiple tags
[wolnelektury.git] / src / lesmianator / models.py
1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
4 #
5 import cPickle
6 from cPickle import PickleError
7 from datetime import datetime
8 from random import randint
9 from StringIO import StringIO
10
11 from django.core.files.base import ContentFile
12 from django.db import models
13 from django.utils.timezone import utc
14 from django.utils.translation import ugettext_lazy as _
15 from django.core.urlresolvers import reverse
16 from django.contrib.auth.models import User
17 from django.contrib.contenttypes.models import ContentType
18 from django.contrib.contenttypes.fields import GenericForeignKey
19 from django.conf import settings
20
21 from jsonfield import JSONField
22 from catalogue.models import Book, Tag
23
24
25 class Poem(models.Model):
26     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
27     text = models.TextField(_('text'))
28     created_by = models.ForeignKey(User, null=True)
29     created_from = JSONField(_('extra information'), null=True, blank=True)
30     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
31     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
32     view_count = models.IntegerField(_('view count'), default=1)
33
34     try:
35         f = open(settings.LESMIANATOR_PICKLE)
36         global_dictionary = cPickle.load(f)
37         f.close()
38     except (IOError, AttributeError, PickleError):
39         global_dictionary = {}
40
41     def visit(self):
42         self.view_count += 1
43         self.seen_at = datetime.utcnow().replace(tzinfo=utc)
44         self.save()
45
46     def __unicode__(self):
47         return "%s (%s...)" % (self.slug, self.text[:20])
48
49     @staticmethod
50     def choose_letter(word, continuations):
51         if word not in continuations:
52             return u'\n'
53
54         choices = sum((continuations[word][letter]
55                        for letter in continuations[word]))
56         r = randint(0, choices - 1)
57
58         for letter in continuations[word]:
59             r -= continuations[word][letter]
60             if r < 0:
61                 return letter
62
63     @classmethod
64     def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
65         if continuations is None:
66             continuations = cls.global_dictionary
67         if not continuations:
68             return ''
69
70         letters = []
71         word = u''
72
73         finished_stanza_verses = 0
74         current_stanza_verses = 0
75         verse_start = True
76
77         char_count = 0
78
79         # do `min_lines' non-empty verses and then stop,
80         # but let Lesmianator finish his last stanza.
81         while finished_stanza_verses < min_lines and char_count < maxlen:
82             letter = cls.choose_letter(word, continuations)
83             letters.append(letter)
84             word = word[-length + 1:] + letter
85             char_count += 1
86
87             if letter == u'\n':
88                 if verse_start:
89                     finished_stanza_verses += current_stanza_verses
90                     current_stanza_verses = 0
91                 else:
92                     current_stanza_verses += 1
93                     verse_start = True
94             else:
95                 verse_start = False
96
97         return ''.join(letters).strip()
98
99     def get_absolute_url(self):
100         return reverse('get_poem', kwargs={'poem': self.slug})
101
102
103 class Continuations(models.Model):
104     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
105     content_type = models.ForeignKey(ContentType)
106     object_id = models.PositiveIntegerField()
107     content_object = GenericForeignKey('content_type', 'object_id')
108
109     class Meta:
110         unique_together = (('content_type', 'object_id'), )
111
112     def __unicode__(self):
113         return "Continuations for: %s" % unicode(self.content_object)
114
115     @staticmethod
116     def join_conts(a, b):
117         for pre in b:
118             a.setdefault(pre, {})
119             for post in b[pre]:
120                 a[pre].setdefault(post, 0)
121                 a[pre][post] += b[pre][post]
122         return a
123
124     @classmethod
125     def for_book(cls, book, length=3):
126         # count from this book only
127         output = StringIO()
128         wldoc = book.wldocument(parse_dublincore=False)
129         output = wldoc.as_text(('raw-text',)).get_string()
130         del wldoc
131
132         conts = {}
133         last_word = ''
134         for letter in output.decode('utf-8').strip().lower():
135             mydict = conts.setdefault(last_word, {})
136             mydict.setdefault(letter, 0)
137             mydict[letter] += 1
138             last_word = last_word[-length+1:] + letter
139         # add children
140         return reduce(cls.join_conts,
141                       (cls.get(child) for child in book.children.all().iterator()),
142                       conts)
143
144     @classmethod
145     def for_set(cls, tag):
146         books = Book.tagged_top_level([tag])
147         cont_tabs = (cls.get(b) for b in books.iterator())
148         return reduce(cls.join_conts, cont_tabs)
149
150     @classmethod
151     def get(cls, sth):
152         object_type = ContentType.objects.get_for_model(sth)
153         should_keys = {sth.id}
154         if isinstance(sth, Tag):
155             should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
156         try:
157             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
158             if not obj.pickle:
159                 raise cls.DoesNotExist
160             f = open(obj.pickle.path)
161             keys, conts = cPickle.load(f)
162             f.close()
163             if set(keys) != should_keys:
164                 raise cls.DoesNotExist
165             return conts
166         except cls.DoesNotExist:
167             if isinstance(sth, Book):
168                 conts = cls.for_book(sth)
169             elif isinstance(sth, Tag):
170                 conts = cls.for_set(sth)
171             else:
172                 raise NotImplementedError('Lesmianator continuations: only Book and Tag supported')
173
174             c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
175             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
176             c.save()
177             return conts