X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/7e890833260e5aef8ae217d195309b76fa91c02f..1bf869129aa603b63574240385dd76d8975bfa34:/apps/lesmianator/models.py diff --git a/apps/lesmianator/models.py b/apps/lesmianator/models.py index dce4b15b1..83e82135c 100644 --- a/apps/lesmianator/models.py +++ b/apps/lesmianator/models.py @@ -9,24 +9,22 @@ from StringIO import StringIO from django.core.files.base import ContentFile from django.db import models -from django.db.models import permalink +from django.utils.timezone import utc from django.utils.translation import ugettext_lazy as _ from django.core.urlresolvers import reverse -from django.db.models.signals import m2m_changed from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes import generic from django.conf import settings -from librarian import text -from catalogue.fields import JSONField +from jsonfield import JSONField from catalogue.models import Book, Tag class Poem(models.Model): slug = models.SlugField(_('slug'), max_length=120, db_index=True) text = models.TextField(_('text')) - created_by = models.ForeignKey(User) + created_by = models.ForeignKey(User, null=True) created_from = JSONField(_('extra information'), null=True, blank=True) created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False) seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False) @@ -41,52 +39,59 @@ class Poem(models.Model): def visit(self): self.view_count += 1 - self.seen_at = datetime.now() + self.seen_at = datetime.utcnow().replace(tzinfo=utc) self.save() def __unicode__(self): return "%s (%s...)" % (self.slug, self.text[:20]) - @classmethod - def write(cls, continuations=None, length=3, maxlen=1000): - def choose_word(word, continuations): - try: - choices = sum((continuations[word][post] for post in continuations[word])) - r = randint(0, choices - 1) + @staticmethod + def choose_letter(word, continuations): + if word not in continuations: + return u'\n' - for post in continuations[word]: - r -= continuations[word][post] - if r < 0: - return post - except KeyError: - return '' + choices = sum((continuations[word][letter] + for letter in continuations[word])) + r = randint(0, choices - 1) + for letter in continuations[word]: + r -= continuations[word][letter] + if r < 0: + return letter + @classmethod + def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000): if continuations is None: continuations = cls.global_dictionary + if not continuations: + return '' letters = [] word = u'' - empty = -10 - lines = 0 - if not continuations: - maxlen = 0 - # want at least two lines, but let Lesmianator end his stanzas - while (empty < 2 or lines < 2) and maxlen: - letter = choose_word(word, continuations) + + finished_stanza_verses = 0 + current_stanza_verses = 0 + verse_start = True + + char_count = 0 + + # do `min_lines' non-empty verses and then stop, + # but let Lesmianator finish his last stanza. + while finished_stanza_verses < min_lines and char_count < maxlen: + letter = cls.choose_letter(word, continuations) letters.append(letter) - word = word[-length+1:] + letter + word = word[-length + 1:] + letter + char_count += 1 + if letter == u'\n': - # count non-empty lines - if empty == 0: - lines += 1 - # - if lines >= 2: - empty += 1 - lines += 1 + if verse_start: + finished_stanza_verses += current_stanza_verses + current_stanza_verses = 0 + else: + current_stanza_verses += 1 + verse_start = True else: - empty = 0 - maxlen -= 1 + verse_start = False return ''.join(letters).strip() @@ -100,6 +105,9 @@ class Continuations(models.Model): object_id = models.PositiveIntegerField() content_object = generic.GenericForeignKey('content_type', 'object_id') + class Meta: + unique_together = (('content_type', 'object_id'), ) + def __unicode__(self): return "Continuations for: %s" % unicode(self.content_object) @@ -115,43 +123,50 @@ class Continuations(models.Model): @classmethod def for_book(cls, book, length=3): # count from this book only - print 'for_book', book output = StringIO() - f = open(book.xml_file.path) - text.transform(f, output, False, ('raw-text',)) - f.close() + wldoc = book.wldocument(parse_dublincore=False) + output = wldoc.as_text(('raw-text',)).get_string() + del wldoc + conts = {} last_word = '' - for letter in output.getvalue().decode('utf-8').strip().lower(): + for letter in output.decode('utf-8').strip().lower(): mydict = conts.setdefault(last_word, {}) mydict.setdefault(letter, 0) mydict[letter] += 1 last_word = last_word[-length+1:] + letter # add children - return reduce(cls.join_conts, - (cls.get(child) for child in book.children.all()), + return reduce(cls.join_conts, + (cls.get(child) for child in book.children.all().iterator()), conts) @classmethod def for_set(cls, tag): # book contains its descendants, we don't want them twice books = Book.tagged.with_any((tag,)) - l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books]) - descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)] + l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()]) + descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()] if descendants_keys: books = books.exclude(pk__in=descendants_keys) - cont_tabs = (cls.get(b) for b in books) + cont_tabs = (cls.get(b) for b in books.iterator()) return reduce(cls.join_conts, cont_tabs) @classmethod def get(cls, sth): object_type = ContentType.objects.get_for_model(sth) + should_keys = set([sth.id]) + if isinstance(sth, Tag): + should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator()) try: obj = cls.objects.get(content_type=object_type, object_id=sth.id) + if not obj.pickle: + raise cls.DoesNotExist f = open(obj.pickle.path) - conts = cPickle.load(f) + keys, conts = cPickle.load(f) f.close() + if set(keys) != should_keys: + raise cls.DoesNotExist return conts except cls.DoesNotExist: if isinstance(sth, Book): @@ -161,8 +176,8 @@ class Continuations(models.Model): else: raise NotImplemented('Lesmianator continuations: only Book and Tag supported') - c = cls(content_object=sth) - c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps(conts))) + c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id) + c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts)))) c.save() return conts