fix
[wolnelektury.git] / apps / lesmianator / models.py
index 0a59dde..1b53efd 100644 (file)
@@ -18,7 +18,6 @@ from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes import generic
 from django.conf import settings
 
-from librarian import text
 from catalogue.fields import JSONField
 from catalogue.models import Book, Tag
 
@@ -47,46 +46,53 @@ class Poem(models.Model):
     def __unicode__(self):
         return "%s (%s...)" % (self.slug, self.text[:20])
 
-    @classmethod
-    def write(cls, continuations=None, length=3, maxlen=1000):
-        def choose_word(word, continuations):
-            try:
-                choices = sum((continuations[word][post] for post in continuations[word]))
-                r = randint(0, choices - 1)
+    @staticmethod
+    def choose_letter(word, continuations):
+        if word not in continuations:
+            return u'\n'
 
-                for post in continuations[word]:
-                    r -= continuations[word][post]
-                    if r < 0:
-                        return post
-            except KeyError:
-                return ''
+        choices = sum((continuations[word][letter]
+                       for letter in continuations[word]))
+        r = randint(0, choices - 1)
 
+        for letter in continuations[word]:
+            r -= continuations[word][letter]
+            if r < 0:
+                return letter
 
+    @classmethod
+    def write(cls, continuations=None, length=3, min_lines=2, maxlen=1000):
         if continuations is None:
             continuations = cls.global_dictionary
+        if not continuations:
+            return ''
 
         letters = []
         word = u''
-        empty = -10
-        lines = 0
-        if not continuations:
-            maxlen = 0
-        # want at least two lines, but let Lesmianator end his stanzas
-        while (empty < 2 or lines < 2) and maxlen:
-            letter = choose_word(word, continuations)
+
+        finished_stanza_verses = 0
+        current_stanza_verses = 0
+        verse_start = True
+
+        char_count = 0
+
+        # do `min_lines' non-empty verses and then stop,
+        # but let Lesmianator finish his last stanza.
+        while finished_stanza_verses < min_lines and char_count < maxlen:
+            letter = cls.choose_letter(word, continuations)
             letters.append(letter)
-            word = word[-length+1:] + letter
+            word = word[-length + 1:] + letter
+            char_count += 1
+
             if letter == u'\n':
-                # count non-empty lines
-                if empty == 0:
-                    lines += 1
-                # 
-                if lines >= 2:
-                    empty += 1
-                lines += 1
+                if verse_start:
+                    finished_stanza_verses += current_stanza_verses
+                    current_stanza_verses = 0
+                else:
+                    current_stanza_verses += 1
+                    verse_start = True
             else:
-                empty = 0
-            maxlen -= 1
+                verse_start = False
 
         return ''.join(letters).strip()
 
@@ -119,12 +125,13 @@ class Continuations(models.Model):
     def for_book(cls, book, length=3):
         # count from this book only
         output = StringIO()
-        f = open(book.xml_file.path)
-        text.transform(f, output, False, ('raw-text',))
-        f.close()
+        wldoc = book.wldocument(parse_dublincore=False)
+        output = wldoc.as_text(('raw-text',)).get_string()
+        del wldoc
+
         conts = {}
         last_word = ''
-        for letter in output.getvalue().decode('utf-8').strip().lower():
+        for letter in output.decode('utf-8').strip().lower():
             mydict = conts.setdefault(last_word, {})
             mydict.setdefault(letter, 0)
             mydict[letter] += 1