Fix reader.

[wolnelektury.git] / apps / lesmianator / models.py
diff --git a/apps/lesmianator/models.py b/apps/lesmianator/models.py

index dce4b15..7fceda2 100644 (file)
--- a/apps/lesmianator/models.py
+++ b/apps/lesmianator/models.py
@@ -18,15 +18,14 @@ from django.contrib.contenttypes.models import ContentType
  from django.contrib.contenttypes import generic
  from django.conf import settings
  
  from django.contrib.contenttypes import generic
  from django.conf import settings
  
-from librarian import text
-from catalogue.fields import JSONField
+from jsonfield import JSONField
  from catalogue.models import Book, Tag
  
  
  class Poem(models.Model):
      slug = models.SlugField(_('slug'), max_length=120, db_index=True)
      text = models.TextField(_('text'))
  from catalogue.models import Book, Tag
  
  
  class Poem(models.Model):
      slug = models.SlugField(_('slug'), max_length=120, db_index=True)
      text = models.TextField(_('text'))
-    created_by = models.ForeignKey(User)
+    created_by = models.ForeignKey(User, null=True)
      created_from = JSONField(_('extra information'), null=True, blank=True)
      created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
      seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
      created_from = JSONField(_('extra information'), null=True, blank=True)
      created_at = models.DateTimeField(_('creation date'), auto_now_add=True, editable=False)
      seen_at = models.DateTimeField(_('last view date'), auto_now_add=True, editable=False)
@@ -47,46 +46,53 @@ class Poem(models.Model):
      def __unicode__(self):
          return "%s (%s...)" % (self.slug, self.text[:20])
  
      def __unicode__(self):
          return "%s (%s...)" % (self.slug, self.text[:20])
  
-    @classmethod
-    def write(cls, continuations=None, length=3, maxlen=1000):
-        def choose_word(word, continuations):
-            try:
-                choices = sum((continuations[word][post] for post in continuations[word]))
-                r = randint(0, choices - 1)
+    @staticmethod
+    def choose_letter(word, continuations):
+        if word not in continuations:
+            return u'\n'
  
  
-                for post in continuations[word]:
-                    r -= continuations[word][post]
-                    if r < 0:
-                        return post
-            except KeyError:
-                return ''
+        choices = sum((continuations[word][letter]
+                       for letter in continuations[word]))
+        r = randint(0, choices - 1)
  
  
+        for letter in continuations[word]:
+            r -= continuations[word][letter]
+            if r < 0:
+                return letter
  
  
+    @classmethod
+    def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
          if continuations is None:
              continuations = cls.global_dictionary
          if continuations is None:
              continuations = cls.global_dictionary
+        if not continuations:
+            return ''
  
          letters = []
          word = u''
  
          letters = []
          word = u''
-        empty = -10
-        lines = 0
-        if not continuations:
-            maxlen = 0
-        # want at least two lines, but let Lesmianator end his stanzas
-        while (empty < 2 or lines < 2) and maxlen:
-            letter = choose_word(word, continuations)
+
+        finished_stanza_verses = 0
+        current_stanza_verses = 0
+        verse_start = True
+
+        char_count = 0
+
+        # do `min_lines' non-empty verses and then stop,
+        # but let Lesmianator finish his last stanza.
+        while finished_stanza_verses < min_lines and char_count < maxlen:
+            letter = cls.choose_letter(word, continuations)
              letters.append(letter)
              letters.append(letter)
-            word = word[-length+1:] + letter
+            word = word[-length + 1:] + letter
+            char_count += 1
+
              if letter == u'\n':
              if letter == u'\n':
-                # count non-empty lines
-                if empty == 0:
-                    lines += 1
-                # 
-                if lines >= 2:
-                    empty += 1
-                lines += 1
+                if verse_start:
+                    finished_stanza_verses += current_stanza_verses
+                    current_stanza_verses = 0
+                else:
+                    current_stanza_verses += 1
+                    verse_start = True
              else:
              else:
-                empty = 0
-            maxlen -= 1
+                verse_start = False
  
          return ''.join(letters).strip()
  
  
          return ''.join(letters).strip()
  
@@ -100,6 +106,9 @@ class Continuations(models.Model):
      object_id = models.PositiveIntegerField()
      content_object = generic.GenericForeignKey('content_type', 'object_id')
  
      object_id = models.PositiveIntegerField()
      content_object = generic.GenericForeignKey('content_type', 'object_id')
  
+    class Meta:
+        unique_together = (('content_type', 'object_id'), )
+
      def __unicode__(self):
          return "Continuations for: %s" % unicode(self.content_object)
  
      def __unicode__(self):
          return "Continuations for: %s" % unicode(self.content_object)
  
@@ -115,43 +124,50 @@ class Continuations(models.Model):
      @classmethod
      def for_book(cls, book, length=3):
          # count from this book only
      @classmethod
      def for_book(cls, book, length=3):
          # count from this book only
-        print 'for_book', book
          output = StringIO()
          output = StringIO()
-        f = open(book.xml_file.path)
-        text.transform(f, output, False, ('raw-text',))
-        f.close()
+        wldoc = book.wldocument(parse_dublincore=False)
+        output = wldoc.as_text(('raw-text',)).get_string()
+        del wldoc
+
          conts = {}
          last_word = ''
          conts = {}
          last_word = ''
-        for letter in output.getvalue().decode('utf-8').strip().lower():
+        for letter in output.decode('utf-8').strip().lower():
              mydict = conts.setdefault(last_word, {})
              mydict.setdefault(letter, 0)
              mydict[letter] += 1
              last_word = last_word[-length+1:] + letter
          # add children
          return reduce(cls.join_conts, 
              mydict = conts.setdefault(last_word, {})
              mydict.setdefault(letter, 0)
              mydict[letter] += 1
              last_word = last_word[-length+1:] + letter
          # add children
          return reduce(cls.join_conts, 
-                      (cls.get(child) for child in book.children.all()),
+                      (cls.get(child) for child in book.children.all().iterator()),
                        conts)
  
      @classmethod
      def for_set(cls, tag):
          # book contains its descendants, we don't want them twice
          books = Book.tagged.with_any((tag,))
                        conts)
  
      @classmethod
      def for_set(cls, tag):
          # book contains its descendants, we don't want them twice
          books = Book.tagged.with_any((tag,))
-        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books])
-        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags)]
+        l_tags = Tag.objects.filter(category='book', slug__in=[book.book_tag_slug() for book in books.iterator()])
+        descendants_keys = [book.pk for book in Book.tagged.with_any(l_tags).iterator()]
          if descendants_keys:
              books = books.exclude(pk__in=descendants_keys)
  
          if descendants_keys:
              books = books.exclude(pk__in=descendants_keys)
  
-        cont_tabs = (cls.get(b) for b in books)
+        cont_tabs = (cls.get(b) for b in books.iterator())
          return reduce(cls.join_conts, cont_tabs)
  
      @classmethod
      def get(cls, sth):
          object_type = ContentType.objects.get_for_model(sth)
          return reduce(cls.join_conts, cont_tabs)
  
      @classmethod
      def get(cls, sth):
          object_type = ContentType.objects.get_for_model(sth)
+        should_keys = set([sth.id])
+        if isinstance(sth, Tag):
+            should_keys = set(b.pk for b in Book.tagged.with_any((sth,)).iterator())
          try:
              obj = cls.objects.get(content_type=object_type, object_id=sth.id)
          try:
              obj = cls.objects.get(content_type=object_type, object_id=sth.id)
+            if not obj.pickle:
+                raise cls.DoesNotExist
              f = open(obj.pickle.path)
              f = open(obj.pickle.path)
-            conts = cPickle.load(f)
+            keys, conts = cPickle.load(f)
              f.close()
              f.close()
+            if set(keys) != should_keys:
+                raise cls.DoesNotExist
              return conts
          except cls.DoesNotExist:
              if isinstance(sth, Book):
              return conts
          except cls.DoesNotExist:
              if isinstance(sth, Book):
@@ -161,8 +177,8 @@ class Continuations(models.Model):
              else:
                  raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
  
              else:
                  raise NotImplemented('Lesmianator continuations: only Book and Tag supported')
  
-            c = cls(content_object=sth)
-            c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps(conts)))
+            c, created = cls.objects.get_or_create(content_type=object_type, object_id=sth.id)
+            c.pickle.save(sth.slug+'.p', ContentFile(cPickle.dumps((should_keys, conts))))
              c.save()
              return conts
  
              c.save()
              return conts