lesmianator management
authorRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Fri, 29 Oct 2010 11:08:04 +0000 (13:08 +0200)
committerRadek Czajka <radoslaw.czajka@nowoczesnapolska.org.pl>
Fri, 29 Oct 2010 12:01:37 +0000 (14:01 +0200)
apps/lesmianator/management/__init__.py [new file with mode: 0644]
apps/lesmianator/management/commands/__init__.py [new file with mode: 0644]
apps/lesmianator/management/commands/lesmianator.py [new file with mode: 0644]
apps/lesmianator/views.py

diff --git a/apps/lesmianator/management/__init__.py b/apps/lesmianator/management/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/apps/lesmianator/management/commands/__init__.py b/apps/lesmianator/management/commands/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/apps/lesmianator/management/commands/lesmianator.py b/apps/lesmianator/management/commands/lesmianator.py
new file mode 100644 (file)
index 0000000..36d7144
--- /dev/null
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
+import sys
+from cPickle import load, dump
+from optparse import make_option
+
+from django.core.management.base import BaseCommand
+from django.core.management.color import color_style
+from django.conf import settings
+
+from catalogue.models import Book, Tag
+
+
+class Command(BaseCommand):
+    option_list = BaseCommand.option_list + (
+        make_option('-t', '--tags', dest='tags', metavar='SLUG,...',
+            help='Use only books tagged with this tags'),
+        make_option('-i', '--include', dest='include', metavar='SLUG,...',
+            help='Include specific books by slug'),
+        make_option('-e', '--exclude', dest='exclude', metavar='SLUG,...',
+            help='Exclude specific books by slug')
+    )
+    help = 'Prepare data for Lesmianator.'
+
+    def handle(self, *args, **options):
+        self.style = color_style()
+        verbose = int(options.get('verbosity'))
+        tags = options.get('tags')
+        include = options.get('include')
+        exclude = options.get('exclude')
+
+        try:
+            path = settings.LESMIANATOR_PICKLE
+        except:
+            print self.style.ERROR('LESMIANATOR_PICKLE not set in the settings.')
+            return
+
+        books = []
+
+        if include:
+            books += list(Book.objects.filter(slug__in=include.split(',')).only('slug', 'txt_file'))
+
+        if tags:
+            books += list(Book.tagged.with_all(Tag.objects.filter(slug__in=tags.split(','))).only('slug', 'txt_file'))
+        elif not include:
+            books = list(Book.objects.all().only('slug', 'txt_file'))
+
+        if exclude:
+            books = [book for book in books if book.slug not in exclude.split(',')]
+
+        books = set(books)
+
+        lesmianator = {}
+        processed = skipped = 0
+        for book in books:
+            if verbose >= 2:
+                print 'Parsing', book.slug
+            if not book.txt_file:
+                if verbose >= 1:
+                    print self.style.NOTICE('%s has no TXT file' % book.slug)
+                skipped += 1
+                continue
+            processed += 1
+            last_word = ''
+            for number, line in enumerate(book.txt_file):
+                if number < 17:
+                    continue
+                line = unicode(line, 'utf-8').lower()
+                for letter in line:
+                    mydict = lesmianator.setdefault(last_word, {})
+                    myval = mydict.setdefault(letter, 0)
+                    mydict[letter] += 1
+                    last_word = last_word[-2:] + letter
+
+        if not processed:
+            if skipped:
+                print self.style.ERROR("No books with TXT files found")
+            else:
+                print self.style.ERROR("No books found")
+            return
+
+        try:
+            dump(lesmianator, open(path, 'w'))
+        except:
+            print self.style.ERROR("Counldn't write to $s" % path)
+            return
+
+        dump(lesmianator, open(path, 'w'))
+        if verbose >= 1:
+            print "%d processed, %d skipped" % (processed, skipped)
+            print "Results dumped do %s" % path 
index 8936772..2d6d53f 100644 (file)
@@ -5,8 +5,6 @@ from django.shortcuts import render_to_response
 from django.template import RequestContext
 from random import randint
 
-import os.path
-
 
 def _choose_word(word):
     try:
@@ -21,8 +19,10 @@ def _choose_word(word):
         return ''
 
 # load dictionary on start, it won't change
+from django.conf import settings
+
 try:
-    f = open(os.path.join(os.path.dirname(__file__), 'dictionary.p'))
+    f = open(settings.LESMIANATOR_PICKLE)
     _dictionary = cPickle.load(f)
 except:
     _dictionary = {}