apps/lesmianator/models.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
   3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
   4 #
   5 import cPickle
   6 from datetime import datetime
   7 from random import randint
   8 from StringIO import StringIO
   9
  10 from django.core.files.base import ContentFile
  11 from django.db import models
  12 from django.db.models import permalink
  13 from django.utils.translation import ugettext_lazy as _
  14 from django.core.urlresolvers import reverse
  15 from django.db.models.signals import m2m_changed
  16 from django.contrib.auth.models import User
  17 from django.contrib.contenttypes.models import ContentType
  18 from django.contrib.contenttypes import generic
  19 from django.conf import settings
  20
  21 from librarian import text
  22 from catalogue.fields import JSONField
  23 from catalogue.models import Book, Tag
  24
  25
  26 class Poem(models.Model):
  27     slug = models.SlugField(_('slug'), max_length=120, db_index=True)
  28     text = models.TextField(_('text'))
  29     created_by = models.ForeignKey(User)
  30     created_from = JSONField(_('extra information'), null=True, blank=True)
  31     created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
  32     seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
  33     view_count = models.IntegerField(_('view count'), default=1)
  34
  35     try:
  36         f = open(settings.LESMIANATOR_PICKLE)
  37         global_dictionary = cPickle.load(f)
  38         f.close()
  39     except:
  40         global_dictionary = {}
  41
  42     def visit(self):
  43         self.view_count += 1
  44         self.seen_at = datetime.now()
  45         self.save()
  46
  47     def __unicode__(self):
  48         return "%s (%s...)" % (self.slug, self.text[:20])
  49
  50     @classmethod
  51     def write(cls, continuations=None, length=3, maxlen=1000):
  52         def choose_word(word, continuations):
  53             try:
  54                 choices = sum((continuations[word][post] for post in continuations[word]))
  55                 r = randint(0, choices - 1)
  56
  57                 for post in continuations[word]:
  58                     r -= continuations[word][post]
  59                     if r < 0:
  60                         return post
  61             except KeyError:
  62                 return ''
  63
  64
  65         if continuations is None:
  66             continuations = cls.global_dictionary
  67
  68         letters = []
  69         word = u''
  70         empty = -10
  71         lines = 0
  72         if not continuations:
  73             maxlen = 0
  74         # want at least two lines, but let Lesmianator end his stanzas
  75         while (empty < 2 or lines < 2) and maxlen:
  76             letter = choose_word(word, continuations)
  77             letters.append(letter)
  78             word = word[-length+1:] + letter
  79             if letter == u'\n':
  80                 # count non-empty lines
  81                 if empty == 0:
  82                     lines += 1
  83                 #
  84                 if lines >= 2:
  85                     empty += 1
  86                 lines += 1
  87             else:
  88                 empty = 0
  89             maxlen -= 1
  90
  91         return ''.join(letters).strip()
  92
  93     def get_absolute_url(self):
  94         return reverse('get_poem', kwargs={'poem': self.slug})
  95
  96
  97 class Continuations(models.Model):
  98     pickle = models.FileField(_('Continuations file'), upload_to='lesmianator')
  99     content_type = models.ForeignKey(ContentType)
 100     object_id = models.PositiveIntegerField()
 101     content_object = generic.GenericForeignKey('content_type', 'object_id')
 102
 103     def __unicode__(self):
 104         return "Continuations for: %s" % unicode(self.content_object)
 105
 106     @staticmethod
 107     def join_conts(a, b):
 108         for pre in b:
 109             a.setdefault(pre, {})
 110             for post in b[pre]:
 111                 a[pre].setdefault(post, 0)
 112                 a[pre][post] += b[pre][post]
 113         return a
 114
 115     @classmethod
 116     def for_book(cls, book, length=3):
 117         # count from this book only
 118         print 'for_book', book
 119         output = StringIO()
 120         f = open(book.xml_file.path)
 121         text.transform(f, output, False, ('raw-text',))
 122         f.close()
 123         conts = {}
 124         last_word = ''
 125         for letter in output.getvalue().decode('utf-8').strip().lower():
 126             mydict = conts.setdefault(last_word, {})
 127             mydict.setdefault(letter, 0)
 128             mydict[letter] += 1
 129             last_word = last_word[-length+1:] + letter
 130         # add children
 131         return reduce(cls.join_conts,
 132                       (cls.get(child) for child in book.children.all()),
 133                       conts)
 134
 135     @classmethod
 136     def for_set(cls, tag):
 137         # book contains its descendants, we don't want them twice
 138         books = Book.tagged.with_any((tag,))
 139         l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
 140         descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
 141         if descendants_keys:
 142             books = books.exclude(pk__in=descendants_keys)
 143
 144         cont_tabs = (cls.get(b) for b in books)
 145         return reduce(cls.join_conts, cont_tabs)
 146
 147     @classmethod
 148     def get(cls, sth):
 149         object_type = ContentType.objects.get_for_model(sth)
 150         try:
 151             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
 152             f = open(obj.pickle.path)
 153             conts = cPickle.load(f)
 154             f.close()
 155             return conts
 156         except cls.DoesNotExist:
 157             if isinstance(sth, Book):
 158                 conts = cls.for_book(sth)
 159             elif isinstance(sth, Tag):
 160                 conts = cls.for_set(sth)
 161             else:
 162                 raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
 163
 164             c = cls(content_object=sth)
 165             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps(conts)))
 166             c.save()
 167             return conts
 168
 169