More tolerance for bad url metadata.
[wolnelektury.git] / apps / lesmianator / models.py
index 4b3daf8..83e8213 100644 (file)
@@ -9,17 +9,15 @@ from StringIO import StringIO
 
 from django.core.files.base import ContentFile
 from django.db import models
 
 from django.core.files.base import ContentFile
 from django.db import models
-from django.db.models import permalink
+from django.utils.timezone import utc
 from django.utils.translation import ugettext_lazy as _
 from django.core.urlresolvers import reverse
 from django.utils.translation import ugettext_lazy as _
 from django.core.urlresolvers import reverse
-from django.db.models.signals import m2m_changed
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes import generic
 from django.conf import settings
 
 from django.contrib.auth.models import User
 from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes import generic
 from django.conf import settings
 
-from librarian import text
-from catalogue.fields import JSONField
+from jsonfield import JSONField
 from catalogue.models import Book, Tag
 
 
 from catalogue.models import Book, Tag
 
 
@@ -41,52 +39,59 @@ class Poem(models.Model):
 
     def visit(self):
         self.view_count += 1
 
     def visit(self):
         self.view_count += 1
-        self.seen_at = datetime.now()
+        self.seen_at = datetime.utcnow().replace(tzinfo=utc)
         self.save()
 
     def __unicode__(self):
         return "%s (%s...)" % (self.slug, self.text[:20])
 
         self.save()
 
     def __unicode__(self):
         return "%s (%s...)" % (self.slug, self.text[:20])
 
-    @classmethod
-    def write(cls, continuations=None, length=3, maxlen=1000):
-        def choose_word(word, continuations):
-            try:
-                choices = sum((continuations[word][post] for post in continuations[word]))
-                r = randint(0, choices - 1)
+    @staticmethod
+    def choose_letter(word, continuations):
+        if word not in continuations:
+            return u'\n'
 
 
-                for post in continuations[word]:
-                    r -= continuations[word][post]
-                    if r < 0:
-                        return post
-            except KeyError:
-                return ''
+        choices = sum((continuations[word][letter]
+                       for letter in continuations[word]))
+        r = randint(0, choices - 1)
 
 
+        for letter in continuations[word]:
+            r -= continuations[word][letter]
+            if r < 0:
+                return letter
 
 
+    @classmethod
+    def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
         if continuations is None:
             continuations = cls.global_dictionary
         if continuations is None:
             continuations = cls.global_dictionary
+        if not continuations:
+            return ''
 
         letters = []
         word = u''
 
         letters = []
         word = u''
-        empty = -10
-        lines = 0
-        if not continuations:
-            maxlen = 0
-        # want at least two lines, but let Lesmianator end his stanzas
-        while (empty < 2 or lines < 2) and maxlen:
-            letter = choose_word(word, continuations)
+
+        finished_stanza_verses = 0
+        current_stanza_verses = 0
+        verse_start = True
+
+        char_count = 0
+
+        # do `min_lines' non-empty verses and then stop,
+        # but let Lesmianator finish his last stanza.
+        while finished_stanza_verses < min_lines and char_count < maxlen:
+            letter = cls.choose_letter(word, continuations)
             letters.append(letter)
             letters.append(letter)
-            word = word[-length+1:] + letter
+            word = word[-length + 1:] + letter
+            char_count += 1
+
             if letter == u'\n':
             if letter == u'\n':
-                # count non-empty lines
-                if empty == 0:
-                    lines += 1
-                # 
-                if lines >= 2:
-                    empty += 1
-                lines += 1
+                if verse_start:
+                    finished_stanza_verses += current_stanza_verses
+                    current_stanza_verses = 0
+                else:
+                    current_stanza_verses += 1
+                    verse_start = True
             else:
             else:
-                empty = 0
-            maxlen -= 1
+                verse_start = False
 
         return ''.join(letters).strip()
 
 
         return ''.join(letters).strip()
 
@@ -100,6 +105,9 @@ class Continuations(models.Model):
     object_id = models.PositiveIntegerField()
     content_object = generic.GenericForeignKey('content_type', 'object_id')
 
     object_id = models.PositiveIntegerField()
     content_object = generic.GenericForeignKey('content_type', 'object_id')
 
+    class Meta:
+        unique_together = (('content_type', 'object_id'), )
+
     def __unicode__(self):
         return "Continuations for: %s" % unicode(self.content_object)
 
     def __unicode__(self):
         return "Continuations for: %s" % unicode(self.content_object)
 
@@ -115,33 +123,33 @@ class Continuations(models.Model):
     @classmethod
     def for_book(cls, book, length=3):
         # count from this book only
     @classmethod
     def for_book(cls, book, length=3):
         # count from this book only
-        print 'for_book', book
         output = StringIO()
         output = StringIO()
-        f = open(book.xml_file.path)
-        text.transform(f, output, False, ('raw-text',))
-        f.close()
+        wldoc = book.wldocument(parse_dublincore=False)
+        output = wldoc.as_text(('raw-text',)).get_string()
+        del wldoc
+
         conts = {}
         last_word = ''
         conts = {}
         last_word = ''
-        for letter in output.getvalue().decode('utf-8').strip().lower():
+        for letter in output.decode('utf-8').strip().lower():
             mydict = conts.setdefault(last_word, {})
             mydict.setdefault(letter, 0)
             mydict[letter] += 1
             last_word = last_word[-length+1:] + letter
         # add children
             mydict = conts.setdefault(last_word, {})
             mydict.setdefault(letter, 0)
             mydict[letter] += 1
             last_word = last_word[-length+1:] + letter
         # add children
-        return reduce(cls.join_conts, 
-                      (cls.get(child) for child in book.children.all()),
+        return reduce(cls.join_conts,
+                      (cls.get(child) for child in book.children.all().iterator()),
                       conts)
 
     @classmethod
     def for_set(cls, tag):
         # book contains its descendants, we don't want them twice
         books = Book.tagged.with_any((tag,))
                       conts)
 
     @classmethod
     def for_set(cls, tag):
         # book contains its descendants, we don't want them twice
         books = Book.tagged.with_any((tag,))
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
-        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
+        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
         if descendants_keys:
             books = books.exclude(pk__in=descendants_keys)
 
         if descendants_keys:
             books = books.exclude(pk__in=descendants_keys)
 
-        cont_tabs = (cls.get(b) for b in books)
+        cont_tabs = (cls.get(b) for b in books.iterator())
         return reduce(cls.join_conts, cont_tabs)
 
     @classmethod
         return reduce(cls.join_conts, cont_tabs)
 
     @classmethod
@@ -149,14 +157,15 @@ class Continuations(models.Model):
         object_type = ContentType.objects.get_for_model(sth)
         should_keys = set([sth.id])
         if isinstance(sth, Tag):
         object_type = ContentType.objects.get_for_model(sth)
         should_keys = set([sth.id])
         if isinstance(sth, Tag):
-            should_keys = set(b.pk for b in Book.tagged.with_any((sth,)))
+            should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
         try:
             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
         try:
             obj = cls.objects.get(content_type=object_type, object_id=sth.id)
+            if not obj.pickle:
+                raise cls.DoesNotExist
             f = open(obj.pickle.path)
             keys, conts = cPickle.load(f)
             f.close()
             if set(keys) != should_keys:
             f = open(obj.pickle.path)
             keys, conts = cPickle.load(f)
             f.close()
             if set(keys) != should_keys:
-                obj.delete()
                 raise cls.DoesNotExist
             return conts
         except cls.DoesNotExist:
                 raise cls.DoesNotExist
             return conts
         except cls.DoesNotExist:
@@ -167,7 +176,7 @@ class Continuations(models.Model):
             else:
                 raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
 
             else:
                 raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
 
-            c = cls(content_object=sth)
+            c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
             c.save()
             return conts
             c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
             c.save()
             return conts