More tolerance for bad url metadata.

[wolnelektury.git] / apps / lesmianator / models.py
diff --git a/apps/lesmianator/models.py b/apps/lesmianator/models.py

index 4b3daf8..83e8213 100644 (file)
--- a/apps/lesmianator/models.py
+++ b/apps/lesmianator/models.py
@@ -9,17 +9,15 @@ from StringIO import StringIO
  
  from django.core.files.base import ContentFile
  from django.db import models
  
  from django.core.files.base import ContentFile
  from django.db import models
-from django.db.models import permalink
+from django.utils.timezone import utc
  from django.utils.translation import ugettext_lazy as _
  from django.core.urlresolvers import reverse
  from django.utils.translation import ugettext_lazy as _
  from django.core.urlresolvers import reverse
-from django.db.models.signals import m2m_changed
  from django.contrib.auth.models import User
  from django.contrib.contenttypes.models import ContentType
  from django.contrib.contenttypes import generic
  from django.conf import settings
  
  from django.contrib.auth.models import User
  from django.contrib.contenttypes.models import ContentType
  from django.contrib.contenttypes import generic
  from django.conf import settings
  
-from librarian import text
-from catalogue.fields import JSONField
+from jsonfield import JSONField
  from catalogue.models import Book, Tag
  
  
  from catalogue.models import Book, Tag
  
  
@@ -41,52 +39,59 @@ class Poem(models.Model):
  
      def visit(self):
          self.view_count += 1
  
      def visit(self):
          self.view_count += 1
-        self.seen_at = datetime.now()
+        self.seen_at = datetime.utcnow().replace(tzinfo=utc)
          self.save()
  
      def __unicode__(self):
          return "%s (%s...)" % (self.slug, self.text[:20])
  
          self.save()
  
      def __unicode__(self):
          return "%s (%s...)" % (self.slug, self.text[:20])
  
-    @classmethod
-    def write(cls, continuations=None, length=3, maxlen=1000):
-        def choose_word(word, continuations):
-            try:
-                choices = sum((continuations[word][post] for post in continuations[word]))
-                r = randint(0, choices - 1)
+    @staticmethod
+    def choose_letter(word, continuations):
+        if word not in continuations:
+            return u'\n'
  
  
-                for post in continuations[word]:
-                    r -= continuations[word][post]
-                    if r < 0:
-                        return post
-            except KeyError:
-                return ''
+        choices = sum((continuations[word][letter]
+                       for letter in continuations[word]))
+        r = randint(0, choices - 1)
  
  
+        for letter in continuations[word]:
+            r -= continuations[word][letter]
+            if r < 0:
+                return letter
  
  
+    @classmethod
+    def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
          if continuations is None:
              continuations = cls.global_dictionary
          if continuations is None:
              continuations = cls.global_dictionary
+        if not continuations:
+            return ''
  
          letters = []
          word = u''
  
          letters = []
          word = u''
-        empty = -10
-        lines = 0
-        if not continuations:
-            maxlen = 0
-        # want at least two lines, but let Lesmianator end his stanzas
-        while (empty < 2 or lines < 2) and maxlen:
-            letter = choose_word(word, continuations)
+
+        finished_stanza_verses = 0
+        current_stanza_verses = 0
+        verse_start = True
+
+        char_count = 0
+
+        # do `min_lines' non-empty verses and then stop,
+        # but let Lesmianator finish his last stanza.
+        while finished_stanza_verses < min_lines and char_count < maxlen:
+            letter = cls.choose_letter(word, continuations)
              letters.append(letter)
              letters.append(letter)
-            word = word[-length+1:] + letter
+            word = word[-length + 1:] + letter
+            char_count += 1
+
              if letter == u'\n':
              if letter == u'\n':
-                # count non-empty lines
-                if empty == 0:
-                    lines += 1
-                # 
-                if lines >= 2:
-                    empty += 1
-                lines += 1
+                if verse_start:
+                    finished_stanza_verses += current_stanza_verses
+                    current_stanza_verses = 0
+                else:
+                    current_stanza_verses += 1
+                    verse_start = True
              else:
              else:
-                empty = 0
-            maxlen -= 1
+                verse_start = False
  
          return ''.join(letters).strip()
  
  
          return ''.join(letters).strip()
  
@@ -100,6 +105,9 @@ class Continuations(models.Model):
      object_id = models.PositiveIntegerField()
      content_object = generic.GenericForeignKey('content_type', 'object_id')
  
      object_id = models.PositiveIntegerField()
      content_object = generic.GenericForeignKey('content_type', 'object_id')
  
+    class Meta:
+        unique_together = (('content_type', 'object_id'), )
+
      def __unicode__(self):
          return "Continuations for: %s" % unicode(self.content_object)
  
      def __unicode__(self):
          return "Continuations for: %s" % unicode(self.content_object)
  
@@ -115,33 +123,33 @@ class Continuations(models.Model):
      @classmethod
      def for_book(cls, book, length=3):
          # count from this book only
      @classmethod
      def for_book(cls, book, length=3):
          # count from this book only
-        print 'for_book', book
          output = StringIO()
          output = StringIO()
-        f = open(book.xml_file.path)
-        text.transform(f, output, False, ('raw-text',))
-        f.close()
+        wldoc = book.wldocument(parse_dublincore=False)
+        output = wldoc.as_text(('raw-text',)).get_string()
+        del wldoc
+
          conts = {}
          last_word = ''
          conts = {}
          last_word = ''
-        for letter in output.getvalue().decode('utf-8').strip().lower():
+        for letter in output.decode('utf-8').strip().lower():
              mydict = conts.setdefault(last_word, {})
              mydict.setdefault(letter, 0)
              mydict[letter] += 1
              last_word = last_word[-length+1:] + letter
          # add children
              mydict = conts.setdefault(last_word, {})
              mydict.setdefault(letter, 0)
              mydict[letter] += 1
              last_word = last_word[-length+1:] + letter
          # add children
-        return reduce(cls.join_conts, 
-                      (cls.get(child) for child in book.children.all()),
+        return reduce(cls.join_conts,
+                      (cls.get(child) for child in book.children.all().iterator()),
                        conts)
  
      @classmethod
      def for_set(cls, tag):
          # book contains its descendants, we don't want them twice
          books = Book.tagged.with_any((tag,))
                        conts)
  
      @classmethod
      def for_set(cls, tag):
          # book contains its descendants, we don't want them twice
          books = Book.tagged.with_any((tag,))
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
-        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
+        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
          if descendants_keys:
              books = books.exclude(pk__in=descendants_keys)
  
          if descendants_keys:
              books = books.exclude(pk__in=descendants_keys)
  
-        cont_tabs = (cls.get(b) for b in books)
+        cont_tabs = (cls.get(b) for b in books.iterator())
          return reduce(cls.join_conts, cont_tabs)
  
      @classmethod
          return reduce(cls.join_conts, cont_tabs)
  
      @classmethod
@@ -149,14 +157,15 @@ class Continuations(models.Model):
          object_type = ContentType.objects.get_for_model(sth)
          should_keys = set([sth.id])
          if isinstance(sth, Tag):
          object_type = ContentType.objects.get_for_model(sth)
          should_keys = set([sth.id])
          if isinstance(sth, Tag):
-            should_keys = set(b.pk for b in Book.tagged.with_any((sth,)))
+            should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
          try:
              obj = cls.objects.get(content_type=object_type, object_id=sth.id)
          try:
              obj = cls.objects.get(content_type=object_type, object_id=sth.id)
+            if not obj.pickle:
+                raise cls.DoesNotExist
              f = open(obj.pickle.path)
              keys, conts = cPickle.load(f)
              f.close()
              if set(keys) != should_keys:
              f = open(obj.pickle.path)
              keys, conts = cPickle.load(f)
              f.close()
              if set(keys) != should_keys:
-                obj.delete()
                  raise cls.DoesNotExist
              return conts
          except cls.DoesNotExist:
                  raise cls.DoesNotExist
              return conts
          except cls.DoesNotExist:
@@ -167,7 +176,7 @@ class Continuations(models.Model):
              else:
                  raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
  
              else:
                  raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
  
-            c = cls(content_object=sth)
+            c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
              c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
              c.save()
              return conts
              c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
              c.save()
              return conts